/*
    This file is a part of ficus language project.
    See ficus/LICENSE for the licensing terms
*/

/*
    Converts K-form into C-form (see c_form.ml).
    We assume that K-form has been prepared for the conversion,
    i.e. lambda lifting has been done, all the complex
    data structures, such as records, tuple, lists etc. have
    been converted to KTypName(...) etc.

    The algorithm:
        1. do the 1st pass through the K-form, convert all the types and exceptions (see c_gen_types.ml).
        2. do the 2nd pass through the K-form, generate headers for all the functions (see c_gen_fdecls.ml):
            - each function, except for the constructors, gets extra "free variables"/"closure data"
              parameter fx_fv, even if it's not used.
            - the return value becomes the output parameter fx_result (before the closure pointer), unless
              the function is "nothrow" and the return type is scalar.
            - the non-"nothrow" function gets "int" return value, the status.
              "nothrow" functions return the result or return "void".
        3. do the 3rd pass through the K-form:
            - convert body of each function to C-form
            - put all the global calculations to a dedicated function fx_toplevel();
              all the intermediate values, used at the global scope, are moved to this function as well.
            - collect all the global c-code fragments into a dedicated list
        4. output the final C code as concatenation of the following parts (in this particular order):
            - preamble (some general comment about autogenerated file + #include "ficus/ficus.h")
            - all the global c code
            - forward declarations of some types and their destructors
            - declarations of all the types and their helper functions (constructors, destructors, copy operators)
            - declarations of all the exceptions and their constructors
            - declarations of the global variables together with their default initializers.
            - forward declarations of some functions
            - declarations and definitions of all the functions
            - the function "int fx_toplevel(void) {...}"
            _ the function main() that initializes runtime, calls fx_toplevel() and returns its value.

    The K-form to transform as a recursive tree of expressions.
    So we call kexp2cexp recursively to convert K expression to C code. The function
    takes kexp_t on input and returns a pair of (cexp_t, cstmt_t list) on output.
    The first output value is equivalent C expression or name of variable that stores it.
    The second output value is reversed list of C statements, new statements are added into the begining,
    because it's O(1) operation. In the end of each block we reverse the list and form block statement
    (loop body, if branch, function body etc.)

    We maintain and recursively pass the following compiler state:
    - C code generated so far. Just like in k_normalize.ml, this is the reverse list of c statements.
      It's passed as input to many code generation functions that update it and return.
    - The "catch" label stack. In the beginning of new function we push the default "cleanup" label
      for that function and pop in the end. We also push-pop a new label when we translate
      try {} catch {} block. In principle, we can use dedicated catch labels in some other nested statements,
      like for(), if() etc. For simplicity we do not do that, at the expense of slightly higher
      overhead imposed by the single big cleanup section per function. After we employ the inline function
      call optimization, the relative overhead of the cleanup sections should decrease.
      In the catch blocks we jump to the underlying label in the stack,
      thus providing the exception propagation mechanism.
    - the map of id->cexp_t pairs (i2e); not passed as argument, but maintained separately.
      It does not make sense to represent all the values in K form as C variables.
      If the value (id) is:
      1. used just once
      2. immutable (val, not var)
      3. a result of pure expression, which operands are also values
      then the id can be just replaced with the expression in C-form.
    - the stack of scopes where we need to do some cleanup:

      global -> [ fun -> ] [ try -> ] [ for/while/do-while ->] [ match case ->] ...

    - reference to the destination expression, or ref None.
        - when the translated kexp_t has type KTypVoid, no output value is needed.
        - in some other cases, there is no pre-defined target for expression.
          Then we need to generate a new temporary value and store the result there.
          Or even postpone the temporary value assignment and put the pair (id, cexp) to the map.
        - in some cases we are given the pre-defined expression target.
        - for example, the function body expression result should be stored in fx_result.
        - When we have K-form code "var x=val0; ... x=new_val;", new_val should be stored in x,
          no need to generate a temporary value, because we want to avoid unnecessary data copying.
          So, we pass x as the target location when we convert `new_val` to C code.
        - "record.f = new_val", "arr[idx] += newval" etc. are other such cases. In those cases we
          use a TEMP_REF val that is the pointer to the proper record field or array element.
          However, in those cases we will try to generate idiomatic C code, e.g. "record.f = new_val",
          not the direct replication of K-form: "sometype* pf = &record.f; *pf = new_val"
*/

from Ast import *
from K_form import *
from C_form import *
import K_remove_unused, K_annotate, K_mangle, K_pp
import C_gen_types, C_gen_fdecls, C_pp

import Map, Set, Hashmap, Hashset

type count_map_t = (id_t, int) Hashmap.t

/* Finds a set of immutable values that can potentially be replaced
   with the expressions that they are initalized with, e.g.

   val b = a + 1; foo(b)
   can be replaced with foo(a+1).

   Here we find a superset of such values, i.e. a set of values
   that are pure expressions and used just once.
   If a value is used more than once, it makes
   sense to store it in a temporary variable.

   Later on we shrink this superset. We check that their initialization
   expressions can be represented as scalar C expressions.
   This is a recursive rule that cannot be checked at this stage.
*/
fun find_single_use_vals(topcode: kcode_t)
{
    var count_map: count_map_t = Hashmap.empty(1024, noid, 0)
    var decl_const_vals = empty_id_hashset(1024)

    fun count_atom(a: atom_t, loc: loc_t, callb: k_fold_callb_t) =
        match a {
        | AtomId i =>
            if decl_const_vals.mem(i) {
                val idx = count_map.find_idx_or_insert(i)
                count_map.table[idx].data += 1
            }
        | _ => {}
        }
    fun count_ktyp(t: ktyp_t, loc: loc_t, callb: k_fold_callb_t) {}
    fun count_kexp(e: kexp_t, callb: k_fold_callb_t) =
        match e {
        | KDefVal (k, e1, loc) =>
            val {kv_flags} = get_kval(k, loc)
            /* We only replace those values with expressions which are temporary
                and computed using pure expressions */
            val good_temp = kv_flags.val_flag_tempref || kv_flags.val_flag_temp
            if good_temp && K_remove_unused.pure_kexp(e1) {
                decl_const_vals.add(k)
            }
            count_kexp(e1, callb)
        | KExpCall (f, _, (_, loc)) =>
            /* count f twice to make sure it will not be included into u1vals, because if
               f is a function pointer, then in C the call will be converted to
               `f.fp(args, f.fcv)`, i.e. f is used twice here, so we need to save it anyway */
            count_atom(AtomId(f), loc, callb)
            fold_kexp(e, callb)
        | KExpICall(obj, _, _, (_, loc)) =>
            /* count obj twice, since the virtual call is 'obj->vtbl[meth_idx](obj, ...)',
               i.e. obj is referenced twice */
            count_atom(AtomId(obj), loc, callb)
            fold_kexp(e, callb)
        | _ => fold_kexp(e, callb)
        }

    val count_callb = k_fold_callb_t
    {
        kcb_fold_ktyp=Some(count_ktyp),
        kcb_fold_kexp=Some(count_kexp),
        kcb_fold_atom=Some(count_atom)
    }
    for e <- topcode { count_kexp(e, count_callb) }

    val u1_vals = empty_id_hashset(decl_const_vals.size())
    decl_const_vals.app(fun (i) {
        match count_map.find_opt(i) {
        | Some 1 => u1_vals.add(i)
        | _ => {}
        }})
    u1_vals
}

/* utility function that helps to find some loop or other complex expression invariants.
   if "i0" is temp ref, i.e. a pointer to some part of a complex data type,
   its contents can be implictly modified via a variable/value with different name.
   so we always return true, i.e. "i0" cannot be considered a constant w.r.t "e"
*/
fun occurs_id_kexp(i0: id_t, e: kexp_t): bool
{
    var id_occurs = false
    fun occurs_atom(a: atom_t, loc: loc_t, callb: k_fold_callb_t) =
    match a {
    | AtomId i => if i == i0 { id_occurs = true }
    | _ => {}
    }

    fun occurs_ktyp(t: ktyp_t, loc: loc_t, callb: k_fold_callb_t) {}
    fun occurs_kexp(e: kexp_t, callb: k_fold_callb_t) = if !id_occurs { fold_kexp(e, callb) }

    val loc = get_kexp_loc(e)
    val f = match kinfo_(i0, loc) {
            | KVal ({kv_flags}) => kv_flags.val_flag_tempref
            | _ => false
            }
    f || ({
        val occurs_callb = k_fold_callb_t {
            kcb_fold_ktyp=Some(occurs_ktyp),
            kcb_fold_kexp=Some(occurs_kexp),
            kcb_fold_atom=Some(occurs_atom)
        }
        occurs_kexp(e, occurs_callb)
        id_occurs })
}

type block_kind_t =
    | BlockKind_Global
    | BlockKind_Fun: id_t
    | BlockKind_Block
    | BlockKind_Try
    | BlockKind_Loop
    | BlockKind_LoopND
    | BlockKind_Case


type block_ctx_t =
{
    bctx_kind: block_kind_t;
    bctx_label: id_t;
    bctx_br_label: id_t;
    bctx_for_flags: for_flags_t;
    bctx_status: cexp_t;
    bctx_par_status: cexp_t;
    bctx_prologue: cstmt_t list;
    bctx_cleanup: cstmt_t list;
    bctx_break_used: int;
    bctx_continue_used: int;
    bctx_return_used: int;
    bctx_label_used: int
}

fun gen_ccode_prologue(_: bool, loc: loc_t) =
    [:: CExp(CExpCCode(
        "// this is autogenerated file, do not edit it.\n" + "#include \"ficus/ficus.h\"\n",
        loc))]

fun gen_main(ismain: bool, mod_names: string list, loc: loc_t) =
    if !ismain { [] } else {
        val (fwd_decls, init_calls, deinit_calls) =
        fold fwd_decls = [], init_calls = [], deinit_calls = [] for m@idx <- mod_names {
            (if idx == 0 { fwd_decls }
            else { f"FX_EXTERN_C int fx_init_{m}();\nFX_EXTERN_C void fx_deinit_{m}();\n" :: fwd_decls },
            f"  if (fx_status >= 0) fx_status = fx_init_{m}();\n" :: init_calls,
            f"  fx_deinit_{m}();\n" :: deinit_calls)
        }
        [:: CExp(CExpCCode(
            "".join(fwd_decls) + "
int main(int argc, char** argv)
{
   fx_init(argc, argv);
   int fx_status = FX_OK;
" +
            "".join(init_calls) +
"  if (fx_status < 0) fx_status = fx_print_bt();
" +
            "".join(deinit_calls.rev()) +
"  return fx_deinit(fx_status);
}",
            loc))]
    }

type cexp_map_t = (id_t, cexp_t) Hashmap.t

fun gen_ccode(cmods: cmodule_t list, kmod: kmodule_t, c_fdecls: ccode_t, mod_init_calls: ccode_t)
{
    val {km_name, km_idx, km_cname, km_top, km_main} = kmod
    val cm_idx = km_idx
    val mod_sc = [:: ScModule(km_idx)]
    var top_code = km_top
    var top_inline_ccode: ccode_t = []
    var fwd_fdecls: ccode_t = []
    var glob_data_ccode: ccode_t = []
    var module_cleanup: ccode_t = []
    var defined_syms = empty_id_hashset(1024)
    var i2e: cexp_map_t = Hashmap.empty(1024, noid, CExpTyp(CTypInt, noloc))
    val u1vals = find_single_use_vals(top_code)
    var block_stack: block_ctx_t ref list = []
    val for_letters = [:: "i", "j", "k", "l", "m" ]
    val fx_status_ = get_id("fx_status")
    var return_used = 0
    var func_dstexp_r: cexp_t? ref = ref None

    fun make_label(basename: string, loc: loc_t)
    {
        val basename = if basename.startswith("_fx_") { basename }
                    else { "_fx_" + basename }
        val li = gen_idc(cm_idx, basename)
        val cname = if basename == "_fx_cleanup" { basename }
                    else { "" }
        set_idc_entry(li, CLabel (cdeflabel_t {cl_name=li, cl_cname=cname, cl_loc=loc}) )
        li
    }

    fun make_fx_status(loc: loc_t) = make_id_t_exp(fx_status_, CTypCInt, loc)

    fun ensure_sym_is_defined_or_declared(f: id_t, loc: loc_t) =
        if !defined_syms.mem(f) {
            defined_syms.add(f)
            fwd_fdecls = CDefForwardSym(f, loc) :: fwd_fdecls
        }

    fun check_inside_loop(is_break: bool, loc: loc_t)
    {
        val bc_str = if is_break {"break"} else {"continue"}
        fun check_inside_loop_(s: block_ctx_t ref list)
        {
            | top :: rest =>
                val {bctx_kind, bctx_for_flags} = *top
                match bctx_kind {
                | BlockKind_Loop | BlockKind_LoopND =>
                    if bctx_for_flags.for_flag_nested { check_inside_loop_(rest) }
                    else if is_break { top->bctx_break_used += 1 }
                    else { top->bctx_continue_used += 1 }
                | BlockKind_Fun _ | BlockKind_Global =>
                    throw compile_err(loc, f"'{bc_str}' is used outside of a loop")
                | _ => check_inside_loop_(rest)
                }
            | _ => throw compile_err(loc, f"'{bc_str}' is used outside of a loop")
        }
        check_inside_loop_(block_stack)
    }

    fun curr_block_ctx(loc: loc_t) =
        match block_stack {
        | top :: _ => top
        | _ => throw compile_err(loc, "cgen: empty block stack!")
        }

    fun curr_func(loc: loc_t) =
        match find_opt(for bctx <- block_stack {
                        match bctx->bctx_kind {
                        | BlockKind_Fun _ => true
                        | _ => false
                        }}) {
        | Some(ref {bctx_kind=BlockKind_Fun f}) => f
        | _ => noid
        }

    fun new_block_ctx_(kind: block_kind_t, for_flags: for_flags_t, loc: loc_t)
    {
        val l_basename =
            match kind {
            | BlockKind_Global | BlockKind_Fun _ => "cleanup"
            | _ => "catch"
            }
        val l = make_label(l_basename, get_end_loc(loc))
        val br_l = if kind != BlockKind_LoopND { noid }
                   else { make_label("break", get_end_loc(loc)) }
        val bctx = ref (block_ctx_t {
            bctx_kind=kind, bctx_label=l, bctx_br_label=br_l, bctx_for_flags=for_flags,
            bctx_status=make_dummy_exp(loc), bctx_par_status=make_dummy_exp(loc),
            bctx_prologue=[], bctx_cleanup=[], bctx_break_used=0,
            bctx_continue_used=0, bctx_return_used=0, bctx_label_used=0
        })
        block_stack = bctx :: block_stack
    }

    fun new_block_ctx(kind: block_kind_t, loc: loc_t) = new_block_ctx_(kind, default_for_flags(), loc)

    fun new_for_block_ctx(ndims: int, for_flags: for_flags_t, status: cexp_t, par_status: cexp_t, loc: loc_t)
    {
        val kind = if ndims == 1 { BlockKind_Loop } else { BlockKind_LoopND }
        new_block_ctx_(kind, for_flags, loc)
        val bctx = curr_block_ctx(loc)
        bctx->bctx_status = status
        bctx->bctx_par_status = par_status
    }

    fun pop_block_ctx(loc: loc_t) =
        match block_stack {
        | _ :: rest => block_stack = rest
        | _ => throw compile_err(loc, "cgen: empty block stack!")
        }

    fun curr_block_label(loc: loc_t)
    {
        val bctx = curr_block_ctx(loc)
        bctx->bctx_label_used += 1
        make_id_exp(bctx->bctx_label, loc)
    }

    fun parent_block_label(loc: loc_t) =
        match block_stack {
        | _ :: parent :: _ =>
            parent->bctx_label_used += 1
            make_id_exp(parent->bctx_label, loc)
        | _ => throw compile_err(loc, "cgen internal err: there is no parent block!")
        }

    fun add_fx_call_(call_exp: cexp_t, ccode: ccode_t, lbl: cexp_t, loc: loc_t) {
        val fx_call_e = make_call(std_FX_CALL, [:: call_exp, lbl], CTypVoid, loc)
        CExp(fx_call_e) :: ccode
    }

    fun add_fx_call(call_exp: cexp_t, ccode: ccode_t, loc: loc_t) =
        add_fx_call_(call_exp, ccode, curr_block_label(loc), loc)

    fun add_local(i: id_t, ctyp: ctyp_t, flags: val_flags_t,
                  e0_opt: cexp_t?, ccode: ccode_t, loc: loc_t): (cexp_t, ccode_t)
    {
        val {ctp_ptr, ctp_free=(freem, freef)} = C_gen_types.get_ctprops(ctyp, loc)
        val need_dtor = freem != noid || freef != noid
        if need_dtor {
            val bctx = curr_block_ctx(loc)
            val init_exp =
                if ctp_ptr { make_nullptr(loc) }
                else { CExpInit([], (ctyp, loc)) }
            val (i_exp, prologue) = create_cdefval(i, ctyp, flags, "", Some(init_exp), bctx->bctx_prologue, loc)
            bctx->bctx_prologue = prologue
            bctx->bctx_cleanup = C_gen_types.gen_free_code(i_exp, ctyp, true, true, bctx->bctx_cleanup, loc)
            (i_exp,
            match (e0_opt, ctp_ptr) {
            | (Some(CExpLit (KLitNil _, _)), true) => ccode
            | (Some e0, _) => C_gen_types.gen_copy_code(e0, i_exp, ctyp, ccode, loc)
            | _ => ccode
            })
        } else {
            create_cdefval(i, ctyp, flags, "", e0_opt, ccode, loc)
        }
    }

    fun add_local_tempref(i: id_t, ctyp: ctyp_t, flags: val_flags_t,
                          e0: cexp_t, ccode: ccode_t, loc: loc_t): (cexp_t, ccode_t)
    {
        val {ctp_ptr} = C_gen_types.get_ctprops(ctyp, loc)
        val (e, ctyp) =
            if ctp_ptr && !flags.val_flag_mutable {
                (e0, ctyp)
            } else {
                val ctyp_ptr = make_ptr(ctyp)
                val deref_i_exp = CExpUnary(COpDeref, make_id_t_exp(i, ctyp_ptr, loc), (ctyp, loc))
                i2e.add(i, deref_i_exp)
                (cexp_get_addr(e0), ctyp_ptr)
            }
        create_cdefval(i, ctyp, flags, "", Some(e), ccode, loc)
    }

    fun handle_temp_ref(flags: val_flags_t, e: cexp_t, ctyp: ctyp_t) =
        if flags.val_flag_tempref { (true, cexp_get_addr(e), CTypRawPtr([], ctyp)) }
        else { (false, e, ctyp) }

    fun get_dstexp(dstexp_r: cexp_t? ref, prefix: string, ctyp: ctyp_t, ccode: ccode_t, loc: loc_t) =
        match (ctyp, *dstexp_r) {
        | (CTypVoid, _) => (make_dummy_exp(loc), ccode)
        | (_, Some dst_exp) => (dst_exp, ccode)
        | _ =>
            val i = gen_idc(cm_idx, prefix)
            val (i_exp, ccode) = add_local(i, ctyp, default_tempval_flags(), None, ccode, loc)
            *dstexp_r = Some(i_exp)
            (i_exp, ccode)
        }

    fun get_struct(cexp: cexp_t)
    {
        val (ctyp, cloc) = get_cexp_ctx(cexp)
        fun try_deref(cexp: cexp_t, ctyp: ctyp_t) =
            match ctyp {
            | CTypRawPtr (_, ctyp1) => try_deref(cexp_deref(cexp), ctyp1)
            | _ =>
                match ctyp {
                | CTypName tn =>
                    match cinfo_(tn, cloc) {
                    | CTyp (ref {ct_typ=CTypStruct (_, relems), ct_data_start}) => (tn, cexp, relems, ct_data_start)
                    | CTyp (ref {ct_typ=CTypRawPtr (_, CTypStruct (rn, relems)), ct_data_start}) =>
                        (rn.value_or(noid), cexp_deref(cexp), relems, ct_data_start)
                    | _ => throw compile_err(cloc, f"the type '{get_idc_cname(tn, cloc)}' is not a structure")
                    }
                | CTypStruct (rn, relems) => (rn.value_or(noid), cexp, relems, 0)
                | _ => throw compile_err(cloc, "a structure is expected here")
                }
            }
        try_deref(cexp, ctyp)
    }

    fun unpack_fun_args(args: (id_t, ctyp_t, carg_attr_t list) list, rt: ctyp_t, is_nothrow: bool) {
        /* exclude "fx_fv" from real args if any */
        val (real_args, have_fv_arg) =
        match args.rev() {
        | (_, _, flags) :: rest when flags.mem(CArgFV) => (rest, true)
        | rargs => (rargs, false)
        }
        /* extract and exclude fx_result, if any */
        val (real_args, ret_id, ret_rt) =
        match real_args {
        | ((i, t, flags) :: rest) when flags.mem(CArgRetVal) => (rest, i, t)
        | _ => (real_args, noid, if is_nothrow { rt } else { CTypVoid })
        }
        val real_args = real_args.rev()
        (real_args, ret_id, ret_rt, have_fv_arg)
    }

    fun make_break_stmt(loc: loc_t)
    {
        check_inside_loop(true, loc)
        CExp(make_call(std_FX_BREAK, [:: curr_block_label(loc)], CTypVoid, loc))
    }

    fun make_continue_stmt(loc: loc_t)
    {
        check_inside_loop(false, loc)
        CExp(make_call(std_FX_CONTINUE, [:: curr_block_label(loc)], CTypVoid, loc))
    }

    fun make_if(cc: cexp_t, then_s: cstmt_t, else_s: cstmt_t, loc: loc_t) =
        match then_s {
        | CStmtNop _ | CStmtBlock([], _) =>
            val then_loc = get_cstmt_loc(then_s)
            val inv_cc = match cc {
                | CExpBinary(COpCmp(cmp), a, b, ctx) =>
                    val inv_cmp = match cmp {
                        | CmpEQ => CmpNE | CmpNE => CmpEQ
                        | CmpLT => CmpGE | CmpLE => CmpGT
                        | CmpGT => CmpLE | CmpGE => CmpLT }
                    CExpBinary(COpCmp(inv_cmp), a, b, ctx)
                | CExpUnary(COpLogicNot, a, ctx) => a
                | _ =>
                    val cc_ctx = get_cexp_ctx(cc)
                    CExpUnary(COpLogicNot, cc, cc_ctx)
            }
            CStmtIf(inv_cc, else_s, CStmtNop(then_loc), loc)
        | _ => CStmtIf(cc, then_s, else_s, loc)
        }

    fun id2cexp(i: id_t, save: bool, ccode: ccode_t, loc: loc_t) =
        match i2e.find_opt(i) {
        | Some e =>
            match (save, e) {
            | (false, _) | (_, CExpIdent _) | (_, CExpLit _) | (_, CExpUnary (COpDeref, CExpIdent _, _)) => (e, ccode)
            | _ =>
                val (ctyp, _) = get_cexp_ctx(e)
                val {kv_flags} = get_kval(i, loc)
                val i2 = dup_idc(cm_idx, i)
                val (add_deref, e, ctyp) = handle_temp_ref(kv_flags, e, ctyp)
                val (i2_exp, ccode) = add_local(i2, ctyp, kv_flags, Some(e), ccode, loc)
                i2e.add(i, i2_exp)
                (if add_deref { cexp_deref(i2_exp) } else { i2_exp }, ccode)
            }
        | _ =>
            val e = make_id_exp(i, loc)
            val e =
                match cinfo_(i, loc) {
                | CVal ({cv_typ, cv_flags}) =>
                    if is_val_global(cv_flags) || cv_flags.val_flag_ctor > 0 {
                        ensure_sym_is_defined_or_declared(i, loc)
                    }
                    match cv_typ {
                    | CTypRawPtr (_, ctyp2) when ctyp2 == cv_typ => cexp_deref(e)
                    | _ => e
                    }
                | _ => e
                }
            (e, ccode)
        }

    fun is_immutable_atomic_cexp(e: cexp_t)
    {
        | CExpLit _ => true
        | CExpIdent (i, (t, loc)) =>
            val {ctp_scalar} = C_gen_types.get_ctprops(t, loc)
            ctp_scalar && (match cinfo_(i, loc) {
                          | CVal ({cv_flags}) => !cv_flags.val_flag_mutable
                          | _ => false
                          })
        | _ => false
    }

    fun finalize_loop_body(body_code: ccode_t, enable_break_continue: bool, loc: loc_t)
    {
        val end_loc = get_end_loc(loc)
        val bctx = curr_block_ctx(loc)
        val {bctx_kind, bctx_prologue, bctx_cleanup, bctx_status, bctx_par_status,
            bctx_break_used, bctx_continue_used, bctx_for_flags, bctx_label,
            bctx_br_label, bctx_label_used} = *bctx
        match bctx_kind {
        | BlockKind_Loop | BlockKind_LoopND => {}
        | _ => throw compile_err(loc, "cgen: the current context is not a loop")
        }
        val epilogue = bctx_cleanup.rev()
        val is_parallel = bctx_for_flags.for_flag_parallel
        val (br_label, epilogue) =
        if bctx_label_used + bctx_break_used + bctx_continue_used == 0 {
            (noid, epilogue)
        } else {
            val parent_label_exp = parent_block_label(end_loc)
            val epilogue = epilogue + [:: CStmtLabel(bctx_label, end_loc)]
            if !enable_break_continue && bctx_continue_used + bctx_break_used != 0 {
                throw compile_err(loc, "cgen: cannot use break/continue inside comprehensions")
            }
            val continue_code =
                if bctx_continue_used == 0 { [] }
                else if !is_parallel {
                    [:: CExp(make_call(std_FX_CHECK_CONTINUE, [], CTypVoid, end_loc))]
                } else {
                    throw compile_err(loc, "cgen: 'continue' may not be used inside parallel for")
                }
            val (br_label, break_code) =
                if bctx_break_used == 0 {
                    (noid, [])
                } else if bctx_br_label == noid {
                    (noid, [:: CExp(make_call(std_FX_CHECK_BREAK, [], CTypVoid, end_loc))])
                } else if !is_parallel {
                    val br_label_exp = make_id_exp(bctx_br_label, end_loc)
                    (bctx_br_label, [:: CExp(make_call(std_FX_CHECK_BREAK_ND, [:: br_label_exp], CTypVoid, end_loc))])
                } else {
                    throw compile_err(loc, "cgen: 'break' may not be used inside parallel for")
                }
            val check_exn_code =
                if !is_parallel {
                    [:: CExp(make_call(std_FX_CHECK_EXN, [:: parent_label_exp], CTypVoid, end_loc))]
                } else {
                    [:: CExp(make_call(get_id("FX_CHECK_EXN_PARALLEL"), [:: bctx_status, bctx_par_status], CTypVoid, end_loc))]
                }
            (br_label, check_exn_code + (break_code + (continue_code + epilogue)))
        }
        val body_code = epilogue + (body_code + bctx_prologue)
        val body_stmt = rccode2stmt(body_code, loc)
        pop_block_ctx(end_loc)
        (br_label, body_stmt)
    }

    fun atom2cexp_(a: atom_t, save: bool, ccode: ccode_t, loc: loc_t) =
        match a {
        | AtomLit l =>
            match l {
            | KLitString _ =>
                /* since FX_MAKE_STR(<string_literal>) creates a string with NULL reference counter and
                without allocating string in memory heap, there is no need to call destructor for it */
                val e0 = make_call(std_FX_MAKE_STR, [:: make_lit_exp(l, loc)], CTypString, loc)
                create_cdefval(gen_idc(cm_idx, "slit"), CTypString, default_tempval_flags(), "", Some(e0), ccode, loc)
            | KLitNil ktyp =>
                match deref_ktyp(ktyp, loc) {
                | KTypList _ | KTypCPointer | KTypRawPointer _ =>
                    (make_nullptr(loc), ccode)
                | KTypVector et =>
                    val elem_ctyp = C_gen_types.ktyp2ctyp(et, loc)
                    val (elemsize_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elem_ctyp, loc)
                    val (vec_exp, ccode) = create_cdefval(gen_idc(cm_idx, "zvec"), CTypVector(elem_ctyp),
                        default_tempval_flags(), "", None, ccode, loc)
                    val call_make_empty = make_call(get_id("fx_rrb_make_empty"),
                        [:: elemsize_exp, free_f_exp, copy_f_exp, cexp_get_addr(vec_exp) ],
                        CTypVoid, loc)
                    (vec_exp, CExp(call_make_empty) :: ccode)
                | KTypArray(ndims, et) =>
                    val elem_ctyp = C_gen_types.ktyp2ctyp(et, loc)
                    val (elemsize_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elem_ctyp, loc)
                    val (arr_exp, ccode) = create_cdefval(gen_idc(cm_idx, "zarr"), CTypArray(ndims, elem_ctyp),
                        default_tempval_flags(), "", None, ccode, loc)
                    val call_make_empty = make_call(get_id("fx_make_arr"),
                        [:: make_int_exp(ndims, loc), make_nullptr(loc),
                        elemsize_exp, free_f_exp, copy_f_exp,
                        make_nullptr(loc), cexp_get_addr(arr_exp)],
                        CTypVoid, loc)
                    (arr_exp, CExp(call_make_empty) :: ccode)
                | _ =>
                    throw compile_err(loc, f"cgen: unsupported type '{ktyp}' of '[]' literal")
                }
            | _ =>
                val e = make_lit_exp(l, loc)
                (e, ccode)
            }
        | AtomId i => id2cexp(i, save, ccode, loc)
        }

    fun atom2cexp(a: atom_t, ccode: ccode_t, loc: loc_t) = atom2cexp_(a, false, ccode, loc)

    fun fix_nil(e: cexp_t, ktyp: ktyp_t) =
        match e {
        | CExpInit ([], _) =>
            val loc = get_cexp_loc(e)
            val {ktp_scalar, ktp_ptr} = K_annotate.get_ktprops(ktyp, loc)
            if !(ktp_scalar || ktp_ptr) {
                e
            } else {
                make_int_exp(0, loc)
            }
        | _ => e
        }

    fun decl_plain_arr(arr_id: id_t, elem_ctyp: ctyp_t, arr_data: cexp_t list, ccode: ccode_t, loc: loc_t) =
        match arr_data {
        | [] => (make_nullptr(loc), ccode)
        | _ =>
            val arr_ctyp = CTypRawArray([:: CTypConst], elem_ctyp)
            val arr_data_exp = CExpInit(arr_data, (arr_ctyp, loc))
            create_cdefval(arr_id, arr_ctyp, default_tempval_flags(), "", Some(arr_data_exp), ccode, loc)
        }

    fun get_elem_size_free_copy(elem_ctyp: ctyp_t, loc: loc_t)
    {
        val sizeof_elem_exp = make_call(std_sizeof, [:: CExpTyp(elem_ctyp, loc)], CTypSize_t, loc)
        val free_f_exp =
            match C_gen_types.get_free_f(elem_ctyp, true, false, loc) {
            | (_, Some free_f) => CExpCast(free_f, std_fx_free_t, loc)
            | _ => make_nullptr(loc)
            }
        val copy_f_exp =
            match C_gen_types.get_copy_f(elem_ctyp, true, false, loc) {
            | (_, Some copy_f) => CExpCast(copy_f, std_fx_copy_t, loc)
            | _ => make_nullptr(loc)
            }
        (sizeof_elem_exp, free_f_exp, copy_f_exp)
    }

    fun make_make_arr_call(arr_exp: cexp_t, shape: cexp_t list, data: cexp_t list,
                            ccode0: ccode_t, lbl: cexp_t, loc: loc_t)
    {
        val arr_ctyp = get_cexp_typ(arr_exp)
        val dims = shape.length()
        val shape_ctyp = CTypRawArray([:: CTypConst], CTypInt)
        val shape_arr = CExpInit(shape, (shape_ctyp, loc))
        val (shape_exp, ccode) = create_cdefval(gen_idc(cm_idx, "shape"), shape_ctyp, default_tempval_flags(), "", Some(shape_arr), [], loc)
        val elem_ctyp =
            match arr_ctyp {
            | CTypArray (dims0, elem_ctyp) =>
                if dims0 != dims {
                    throw compile_err(loc, f"cgen: incorrect number of shape elements (actual: {dims}, expected: {dims0})")
                }
                elem_ctyp
            | _ => throw compile_err(loc, "cgen: invalid output type of array construction expression")
            }
        val (data_exp, ccode) = decl_plain_arr(gen_idc(cm_idx, "data"), elem_ctyp, data, ccode, loc)
        val (sizeof_elem_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elem_ctyp, loc)

        val call_mkarr = make_call( std_fx_make_arr, [:: make_int_exp(dims, loc), shape_exp,
                                    sizeof_elem_exp, free_f_exp, copy_f_exp, data_exp,
                                    cexp_get_addr(arr_exp)], CTypCInt, loc)
        val ccode = add_fx_call_(call_mkarr, ccode, lbl, loc)
        rccode2stmt(ccode, loc) :: ccode0
    }

    fun make_make_vec_call(vec_exp: cexp_t, data: cexp_t list,
                            ccode0: ccode_t, lbl: cexp_t, loc: loc_t)
    {
        val vec_ctyp = get_cexp_typ(vec_exp)
        val elem_ctyp =
            match vec_ctyp {
            | CTypVector elem_ctyp =>
                elem_ctyp
            | _ => throw compile_err(loc, "cgen: invalid output type of vector construction expression")
            }
        val ccode = []
        val (data_exp, ccode) =
            if data != [] {
                decl_plain_arr(gen_idc(cm_idx, "data"), elem_ctyp, data, ccode, loc)
            } else {
                (make_nullptr(loc), ccode)
            }
        val nelems_exp = make_int_exp(data.length(), loc)
        val (sizeof_elem_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elem_ctyp, loc)
         val call_mkvec = make_call( std_fx_make_vec, [:: nelems_exp, sizeof_elem_exp,
                                    free_f_exp, copy_f_exp, data_exp,
                                    cexp_get_addr(vec_exp)], CTypCInt, loc)
        val ccode = add_fx_call_(call_mkvec, ccode, lbl, loc)
        rccode2stmt(ccode, loc) :: ccode0
    }

    fun make_fun_arg(e: cexp_t, loc: loc_t)
    {
        val ctyp = get_cexp_typ(e)
        val {ctp_pass_by_ref} = C_gen_types.get_ctprops(ctyp, loc)
        if ctp_pass_by_ref { cexp_get_addr(e) } else { e }
    }

    fun maybe_deref_fun_arg(idx: int, arg: cexp_t, t: ctyp_t, flags: carg_attr_t list, loc: loc_t) =
        match (flags.mem(CArgPassByPtr), t) {
        | (true, CTypRawPtr (_, t)) => (cexp_deref(arg), t)
        | (true, _) =>
            throw compile_err(loc,
                f"cgen: invalid type of argument #{idx}; it's passed by pointer, but the type is not pointer")
        | _ => (arg, t)
        }

    fun make_cons_call(hd_exp: cexp_t, tl_exp: cexp_t, addref_tl: bool,
                       lst_exp: cexp_t, ccode: ccode_t, loc: loc_t)
    {
        val ctyp = get_cexp_typ(lst_exp)
        val hd_exp = make_fun_arg(hd_exp, loc)
        val lcon = C_gen_types.get_constructor(ctyp, true, loc)
        val call_cons = make_call(lcon, [:: hd_exp, tl_exp, make_bool_exp(addref_tl, loc),
                                  cexp_get_addr(lst_exp)], CTypCInt, loc)
        add_fx_call(call_cons, ccode, loc)
    }

    fun make_mkref_call(arg_exp: cexp_t, r_exp: cexp_t, ccode: ccode_t, loc: loc_t)
    {
        val ctyp = get_cexp_typ(r_exp)
        val arg_exp = make_fun_arg(arg_exp, loc)
        val rcon = C_gen_types.get_constructor(ctyp, true, loc)
        val call_mkref = make_call(rcon, [:: arg_exp, cexp_get_addr(r_exp) ], CTypCInt, loc)
        add_fx_call(call_mkref, ccode, loc)
    }

    fun add_size_eq_check(check_list: cexp_t list, ccode: ccode_t,
                          lbl: cexp_t, loc: loc_t) =
        match check_list {
        | e0 :: rest =>
            val fold check_exp = e0 for check_i <- rest {
                CExpBinary(COpLogicAnd, check_exp, check_i, (CTypBool, loc))
            }
            val check_call = make_call(std_FX_CHECK_EQ_SIZE, [:: check_exp, lbl ], CTypVoid, loc)
            CExp(check_call) :: ccode
        | _ => ccode
        }

    fun add_size_post_check(check_list: cexp_t list, ccode: ccode_t,
                            lbl: cexp_t, loc: loc_t) =
        match check_list {
        | e0 :: rest =>
            val n = check_list.length()
            val fold sum_checks = e0 for check_i <- rest {
                CExpBinary(COpAdd, sum_checks, check_i, (CTypCInt, loc))
            }
            val sum_id = gen_idc(cm_idx, "s")
            val (sum_exp, ccode) = create_cdefval(sum_id, CTypCInt, default_tempval_flags(),
                                                  "", Some(sum_checks), ccode, loc)
            val bool_ctx = (CTypBool, loc)
            val check_exp = CExpBinary(COpLogicOr,
                CExpBinary(COpCmp(CmpEQ), sum_exp, make_int_exp(0, loc), bool_ctx),
                CExpBinary(COpCmp(CmpEQ), sum_exp, make_int_exp(n, loc), bool_ctx),
                bool_ctx)
            val check_call = make_call(std_FX_CHECK_EQ_SIZE, [:: check_exp, lbl ], CTypVoid, loc)
            CExp(check_call) :: ccode
        | _ => ccode
        }

    fun for_err_msg(for_idx: int, nfors: int, i: int, msg: string)
    {
        val for_msg_prefix = if nfors == 1 { "" }
                             else if for_idx == 0 { "the outermost " }
                             else if for_idx == nfors - 1 { "the innermost " }
                             else { f"{for_idx}-{String.num_suffix(for_idx)} nested " }
        val i = i + 1
        val it_clause = if i <= 0 { "" }
                        else { f", {i}-{String.num_suffix(i)} iteration clause" }
        f"cgen: {for_msg_prefix}for-loop{it_clause}: {msg}"
    }

    fun compute_for_ndims(for_idx: int, nfors: int, idoml: (id_t, dom_t) list, for_loc: loc_t) =
        fold ndims = 0 for (_, dom_i)@k <- idoml {
            val ndims_i =
                match dom_i {
                | DomainElem(AtomId d) =>
                    match get_idk_ktyp(d, for_loc) {
                    | KTypArray (n, _) => n
                    | _ => 1
                    }
                | _ => 1
                }
            if ndims != 0 && ndims != ndims_i {
                throw compile_err( for_loc, for_err_msg( k, for_idx, nfors,
                    f"dimensionalities of the simultaneously iterated collections/ranges are not the same (...{ndims}...{ndims_i}...)"))
            }
            ndims_i
        }

    /*
        Compute various elements/attributes/parts of the for loop:

        list_exps: lst0, lst1, ... - lists (if any) which are iterated.
        i_exps: i, j, k etc. - for-loop integer iteration variables (only for closed ranges and arrays, not for open ranges or lists)
        n_exps: n0, n1, n2 etc. - for-loop limits (also integers)
        for_checks: extra checks (besides `i < n0`, `j < n1`, `k < n2` etc.). Used for iteration over list(s): lst0[!=0], lst1[!=0] etc. ...
        incr_exps: extra increment operations (besides `i++`, `j++`, `k++`). Used for iteration over lists(s): lst0=lst0->tl, lst1=lst1->tl, ...
        init_checks: the checks that we need to put before the loop into FX_CHECK_NE_SIZE((check0 || check1 || check2 ...), catch_label) macro.
            We check that all closed ranges and all simultaneously iterated 1D or nD arrays have the same shape.
            The lists sizes are not checked because we don't know their sizes before the loop,
                and we don't want to make extra loop to count their lengths.
        init_ccode: inital code for the loop:
            Save all the arrays, lists etc. if needed to guarantee that they are not destroyed
                in the middle of loop (and that they are computed just once).
            Save start:stop:step expressions in the ranges.
            Compute n_i=FX_LOOP_COUNT(start_i, stop_i, step_i) for each range.
            Save n0_i,n1_i,...=arr_i.dim[0,1,...].size.
            init_ccode does not include the check for size/shape equality/inequality (see init_checks). It's added after this let statement.
        pre_body_ccode: code before the inner-most loop. Now it's used only for arrays:
            elem_type0* ptr_arr0 = FX_PTR_<ndims>D(arr_0, i, j, ..., 0);
            elem_type1* ptr_arr1 = FX_PTR_<ndims>D(arr_1, i, j, ..., 0);
            that is, get the pointers to the linear slices of the iterated arrays.
            [TODO] we can check whether all the iterated arrays are continuous and modify n_exps accordingly,
                e.g. in 2D case with 2 arrays A and B:
                if(FX_ARR_CONTINUOUS(A) && FX_ARR_CONTINUOUS(B)) {
                    n1 *= n0;
                    n0 = 1;
                }
                with such a trick, the arrays are processed completely with a single run of inner loop with minimal overhead.
            note that in the case of 1D array processing pre_body_ccode immediately follows init_ccode and init_checks.
        body_pairs: a list of pairs (v_0, exp_0), (v_1, exp_1), ...
            This is a list of values that need to be extracted in the beginning of loop body.
            We keep them separately without forming expressions v_0 = exp_0, because in the case of complex types, e.g. strings or nested arrays,
            we need to initalize v_i separately and put the destructors to the for-loop body cleanup section. All this is done using add_local,
            but for that we need to form a nested for-loop body context, which we form after this let.
        post_checks: the checks that we need to put after the loop into FX_CHECK_NE_SIZE((check0 || check1 || check2 ...), catch_label) macro.
            We check that all closed ranges/arrays and all simultaneously iterated lists finished at once.
            In this let statement we put only lists. The range check is added later if needed.
            a) just list case:
            fx_list0_t lst0 = list0;
            fx_list1_t lst1 = list1;
            for(; lst0 && lst1; lst0=lst0->tl, lst1=lst1->tl) {
                int a=lst0->hd, b=lst1->hd;
                s += abs(a-b);
            }
            FX_CHECK_NE_SIZE(lst0 || lst1, catch_label); // check that both lists finished simultaneously.
            b) list and "closed range"/array case:
            fx_list0_t lst0 = list0;
            fx_list1_t lst1 = list1;
            for(int i = 0; i < n && lst0 && lst1; lst0=lst0->tl, lst1=lst1->tl) {
                int a=lst0->hd, b=lst1->hd;
                if(a!=b) {printf("diff=%d at %d\n", abs(a-b), i);}
            }
            FX_CHECK_NE_SIZE(i < n || lst0 || lst1, catch_label); // check that both lists and the range finished simultaneously.
    */
    fun process_for(lbl: cexp_t, idoml: (id_t, dom_t) list, at_ids: id_t list,
                    for_idx: int, nfors: int, ndims: int, dims_ofs: int,
                    nested_e_idoml: (kexp_t, (id_t, dom_t) list, id_t list) list,
                    init_ccode: ccode_t, loc: loc_t)
    {
        val for_loc = get_start_loc(loc)
        val end_for_loc = get_end_loc(loc)
        if idoml == [] {
            throw compile_err(loc, for_err_msg(for_idx, nfors, -1, "empty list of for iteration values"))
        }
        val (idoml, at_ids) =
            if at_ids == [] {
                (idoml, at_ids)
            } else {
                val have_good_idx =
                    exists(for (_, dom_i) <- idoml {
                        match dom_i {
                        | DomainElem(AtomId k) =>
                            match deref_ktyp(get_idk_ktyp(k, loc), loc) {
                            | KTypList _ => false
                            | _ => true
                            }
                        | DomainFast(AtomId k) =>
                            match deref_ktyp(get_idk_ktyp(k, loc), loc) {
                            | KTypList _ => false
                            | _ => true
                            }
                        | DomainElem(AtomLit(KLitNil _)) => false
                        | DomainRange (_, _, AtomLit(KLitInt 1i64)) => false
                        | DomainRange (_, AtomLit(KLitNil _), _) => false
                        | _ => true
                        }})
                match (have_good_idx, at_ids) {
                | (true, _) => (idoml, at_ids)
                | (false, [:: i]) =>
                    val i_iter = (i, DomainRange(AtomLit(KLitInt(0i64)), _ALitVoid, AtomLit(KLitInt(1i64))))
                    (i_iter :: idoml, [])
                | _ => throw compile_err(loc, for_err_msg(for_idx, nfors, 0, "here @ clause should contain just one scalar index"))
                }
            }

        fun get_iter_id(k: int, at_ids: id_t list, prefix: string) =
            match at_ids {
            | [] => gen_idc(cm_idx, prefix)
            | _ =>
                if k < at_ids.length() { at_ids.nth(k) }
                else { throw compile_err( loc, for_err_msg(for_idx, nfors, 0,
                    "the list of '@' indices is too short for array; looks like it's bug in type checker")) }
            }

        val fold list_exps = ([]: cexp_t list), i_exps = ([]: cexp_t list),
            n_exps = ([]: cexp_t list), for_checks = [],
            incr_exps = [], init_checks = [], init_ccode = init_ccode, pre_body_ccode = [],
            body_elems = [], post_checks = [] for (iter_val_i, dom_i)@k <- idoml {

            val iter_val_i = if iter_val_i != noid { iter_val_i }
                              else { gen_idc(cm_idx, "i") }
            val (lists_i, i_exps, n_exps, for_checks, incr_exps, init_checks,
                init_ccode, pre_body_ccode, body_elems, post_checks):
                (cexp_t list, cexp_t list, cexp_t list, cexp_t list, cexp_t list,
                cexp_t list, ccode_t, ccode_t, (id_t, cexp_t, val_flags_t) list, cexp_t list) =
            match dom_i {
            | DomainRange (a, b, delta) =>
                val (aug_add_delta, add_delta, d_exp, init_ccode) =
                    match delta {
                    | AtomLit(KLitInt 0i64) =>
                        throw compile_err(for_loc, for_err_msg(for_idx, nfors, k, "the iteration step is zero"))
                    | AtomLit(KLitInt i) =>
                        (COpAugAdd, COpAdd, make_int__exp(i, for_loc), init_ccode)
                    | _ =>
                        val (d_exp, init_ccode) = atom2cexp_(delta, true, init_ccode, for_loc)
                        val init_ccode = CExp(make_call(std_FX_CHECK_ZERO_STEP, [:: d_exp, lbl ], CTypVoid, for_loc)) :: init_ccode
                        (COpAugAdd, COpAdd, d_exp, init_ccode)
                    }
                match b {
                | AtomLit(KLitNil _) =>
                    /*
                        int iter_var = a;
                        for(;;iter_var += delta) {
                            ...
                        }
                    */
                    val (a_exp, init_ccode) = atom2cexp_(a, false, init_ccode, for_loc)
                    val (i_exp, init_ccode) = create_cdefval(iter_val_i, CTypInt, default_tempvar_flags(), "", Some(a_exp), init_ccode, for_loc)
                    val incr_i_exp = CExpBinary(aug_add_delta, i_exp, d_exp, (CTypVoid, for_loc))
                    ([], i_exps, n_exps, for_checks, incr_i_exp :: incr_exps, init_checks, init_ccode, pre_body_ccode, body_elems, post_checks)
                | _ =>
                    /*
                        // save the loop counter
                        int n = FX_LOOP_COUNT(a, b, delta); // n === loop_counter
                        // or check it
                        FX_CHECK_NE_SIZE(FX_LOOP_COUNT(a, b, delta) != loop_counter, catch_label);
                        for(int i = 0; i < loop_counter; i++) {
                            int iter_var = a + i*delta; // compute the current value
                        }
                    */
                    val (a_exp, init_ccode) = atom2cexp_(a, true, init_ccode, for_loc)
                    val (b_exp, init_ccode) = atom2cexp_(b, true, init_ccode, for_loc)
                    val is_canonical_for = match (a, delta) {
                                           | (AtomLit(KLitInt 0i64), AtomLit(KLitInt 1i64)) => true
                                           | _ => false
                                           }
                    val calc_n_exp = if is_canonical_for { b_exp }
                                     else { make_call(std_FX_LOOP_COUNT, [:: a_exp, b_exp, d_exp ], CTypInt, for_loc) }
                    val (add_elem, i_exp, i_exps, n_exps, init_checks, init_ccode) =
                        match (i_exps, n_exps) {
                        | (prev_i :: _, prev_n :: _) =>
                            (true, prev_i, i_exps, n_exps, CExpBinary(COpCmp(CmpEQ), prev_n, calc_n_exp, (CTypBool, for_loc)) :: init_checks, init_ccode)
                        | _ =>
                            val (add_pair, i_id) =
                            match (a, delta) {
                            | (AtomLit(KLitInt 0i64), AtomLit(KLitInt(1i64))) => (false, iter_val_i)
                            | _ => (true, get_iter_id(0, at_ids, pp(iter_val_i)))
                            }
                            val i_id = if i_id != noid { i_id }
                                       else { gen_idc(cm_idx, "i") }
                            val (n_exp, init_ccode) =
                            if is_canonical_for && is_immutable_atomic_cexp(b_exp) {
                                (b_exp, init_ccode)
                            } else {
                                add_local(gen_idc(cm_idx, "n"), CTypInt, default_tempval_flags(), Some(calc_n_exp), init_ccode, for_loc)
                            }
                            val (i_exp, _) = add_local(i_id, CTypInt, default_tempvar_flags(), None, [], for_loc)
                            (add_pair, i_exp, i_exp :: i_exps, n_exp :: n_exps, init_checks, init_ccode)
                        }
                    val body_elems =
                        if !add_elem { body_elems }
                        else {
                            val calc_i_exp = CExpBinary(add_delta, a_exp,
                                CExpBinary(COpMul, i_exp, d_exp, (CTypInt, for_loc)),
                                (CTypInt, for_loc))
                            (iter_val_i, calc_i_exp, default_tempvar_flags()) :: body_elems
                        }
                    ([], i_exps, n_exps, for_checks, incr_exps, init_checks, init_ccode, pre_body_ccode, body_elems, post_checks)
                }
            | DomainElem a =>
                val col_ = match a { | AtomId i => i | _ => noid }
                val ktyp = get_atom_ktyp(a, for_loc)
                val ctyp = C_gen_types.ktyp2ctyp(ktyp, for_loc)
                /* before running iteration over a collection,
                    we need to make sure that it will not be deallocated in the middle */
                val (col_exp, init_ccode) =
                if  col_ != noid &&
                    exists(for (e, _, _) <- nested_e_idoml { occurs_id_kexp(col_, e) }) {
                    val (src_exp, init_ccode) = atom2cexp(AtomId(col_), init_ccode, for_loc)
                    val (col_exp, init_ccode) = get_dstexp(ref None, pp(col_), ctyp, init_ccode, for_loc)
                    val init_ccode = C_gen_types.gen_copy_code(src_exp, col_exp, ctyp, init_ccode, for_loc)
                    (col_exp, init_ccode)
                } else {
                    atom2cexp_(a, true, init_ccode, for_loc)
                }
                match deref_ktyp(ktyp, for_loc) {
                | KTypList et =>
                    /*
                        some_lst_t lst = col;
                        for(; lst [&& ...]; lst=lst->tl) {
                            some_lst_elem_t x = lst->hd;
                            ...
                        }
                        // optional check
                        FX_CHECK_NE_SIZE(!lst || ..., catch_label);
                    */
                    val (l_exp, init_ccode) = create_cdefval(gen_idc(cm_idx, "lst"), ctyp, default_tempvar_flags(),
                                                             "", Some(col_exp), init_ccode, for_loc)
                    val not_l_exp = CExpUnary(COpLogicNot, l_exp, (CTypBool, end_for_loc))
                    val l_next_exp = make_assign(l_exp, cexp_arrow(l_exp, get_id("tl"), ctyp))
                    val c_et = C_gen_types.ktyp2ctyp(et, for_loc)
                    val get_hd_exp = cexp_arrow(l_exp, get_id("hd"), c_et)
                    val hd_flags = if is_ktyp_scalar(et) { default_tempvar_flags() }
                                   else { default_tempref_flags() }
                    ([:: l_exp], i_exps, n_exps, l_exp :: for_checks,
                    l_next_exp :: incr_exps, init_checks, init_ccode, pre_body_ccode,
                    (iter_val_i, get_hd_exp, hd_flags) :: body_elems, not_l_exp :: post_checks)
                | KTypString =>
                    /*
                        // either save the length
                        int_ len = str->length; // loop_counter === len
                        // or check it
                        FX_CHECK_NE_SIZE(str->length != loop_counter, catch_label);
                        for(int i = 0; i < loop_counter; i++) {
                            char_ x = str->data[i];
                            ...
                        }
                    */
                    val calc_n_exp = make_call(std_FX_STR_LENGTH, [:: col_exp], CTypInt, for_loc)
                    val (i_exp, i_exps, n_exps, init_checks, init_ccode) =
                    match (i_exps, n_exps) {
                    | (prev_i :: _, prev_n :: _) =>
                        (prev_i, i_exps, n_exps, CExpBinary(COpCmp(CmpEQ), prev_n, calc_n_exp,
                        (CTypBool, for_loc)) :: init_checks, init_ccode)
                    | _ =>
                        val (n_exp, init_ccode) = add_local(gen_idc(cm_idx, "len"), CTypInt,
                            default_tempval_flags(), Some(calc_n_exp), init_ccode, for_loc)
                        val i_id = get_iter_id(0, at_ids, for_letters.nth(dims_ofs))
                        val (i_exp, _) = add_local(i_id, CTypInt, default_tempvar_flags(), None, [], for_loc)
                        (i_exp, i_exp :: i_exps, n_exp :: n_exps, init_checks, init_ccode)
                    }
                    val get_chars = cexp_mem(col_exp, get_id("data"), make_const_ptr(CTypUniChar))
                    val get_char_i = CExpBinary(COpArrayElem, get_chars, i_exp, (CTypUniChar, for_loc))
                    ([], i_exps, n_exps, for_checks, incr_exps, init_checks, init_ccode, pre_body_ccode,
                    (iter_val_i, get_char_i, default_tempvar_flags()) :: body_elems, post_checks )
                | KTypArray (ndims, et) =>
                    /*
                        // either save all the dimensions
                        int_ ni = FX_ARR_SIZE(arr, 0);
                        int_ nj = FX_ARR_SIZE(arr, 1);
                        ...
                        // or check them
                        FX_CHECK_NE_SIZE(FX_ARR_SIZE(arr, 0) != ni || FX_ARR_SIZE(arr, 1) != nj ..., catch_label);
                        for(int i = 0; i < ni; i++) {
                            // before the inner-most loop
                            arr_elem_t* ptr = ...;
                            for(int j = 0; j < nj; j++) {
                                ...
                                ptr[j]
                                ...
                            }
                        }
                    */
                    val (i_exps, n_exps, init_checks, init_ccode) =
                    if n_exps == [] {
                        val fold i_exps = [], n_exps = [], init_ccode = init_ccode for k <- 0:ndims {
                            val calc_n_exp = make_call(std_FX_ARR_SIZE, [:: col_exp, make_int_exp(k, for_loc) ], CTypInt, for_loc)
                            val iter_letter = for_letters.nth(k + dims_ofs)
                            val (n_exp, init_ccode) = add_local(gen_idc(cm_idx, "n" + iter_letter),
                                CTypInt, default_tempval_flags(), Some(calc_n_exp), init_ccode, for_loc)
                            val i_id = get_iter_id(k, at_ids, iter_letter)
                            val (i_exp, _) = add_local(i_id, CTypInt, default_tempvar_flags(), None, [], for_loc)
                            (i_exp :: i_exps, n_exp :: n_exps, init_ccode)
                        }
                        (i_exps.rev(), n_exps.rev(), init_checks, init_ccode)
                    } else {
                        val fold init_checks = init_checks for prev_nk@k <- n_exps {
                            val calc_n_exp = make_call(std_FX_ARR_SIZE, [:: col_exp, make_int_exp(k, for_loc) ], CTypInt, for_loc)
                            val init_check_k = CExpBinary(COpCmp(CmpEQ), prev_nk, calc_n_exp, (CTypBool, for_loc))
                            init_check_k :: init_checks
                        }
                        (i_exps, n_exps, init_checks, init_ccode)
                    }
                    val c_et = C_gen_types.ktyp2ctyp(et, for_loc)
                    val c_et_ptr = make_ptr(c_et)
                    val rev_i_exps = i_exps.rev()
                    val inner_idx = rev_i_exps.hd()
                    val slice_idxs = (make_int_exp(0, for_loc) :: rev_i_exps.tl()).rev()
                    val get_arr_slice = make_call(std_FX_PTR_xD.nth(ndims - 1), CExpTyp(c_et, for_loc) :: col_exp :: slice_idxs, c_et_ptr, for_loc)
                    val ptr_id = gen_idc(cm_idx, "ptr_" + pp(col_))
                    val (ptr_exp, pre_body_ccode) =
                    create_cdefval(ptr_id, c_et_ptr, default_tempval_flags(), "", Some(get_arr_slice), pre_body_ccode, for_loc)
                    val get_arr_elem = CExpBinary(COpArrayElem, ptr_exp, inner_idx, (c_et, for_loc))
                    ([], i_exps, n_exps, for_checks, incr_exps, init_checks, init_ccode, pre_body_ccode,
                    (iter_val_i, get_arr_elem, default_tempvar_flags()) :: body_elems, post_checks)
                | KTypVector (et) =>
                    /*
                        // either save the size or check it
                        int_ n = FX_RRB_SIZE(vec);
                        ...
                        // or check them
                        FX_CHECK_NE_SIZE(FX_RRB_SIZE(vec) != n, catch_label);

                        // then initialize the iterator
                        fx_rrbiter_t iter;
                        typ* ptr = (typ*)fx_rrb_start_read(vec, &iter, 0, 1);
                        for(int i = 0; i < n; i++) {
                            ...
                            *ptr
                            ...
                            FX_RRB_NEXT(iter, ptr)
                        }
                    */
                    val calc_n_exp = make_call(get_id("FX_RRB_SIZE"), [:: col_exp], CTypInt, for_loc)
                    val (i_exps, n_exps, init_checks, init_ccode) =
                    if n_exps == [] {
                        val (n_exp, init_ccode) = add_local(gen_idc(cm_idx, "n"),
                            CTypInt, default_tempval_flags(), Some(calc_n_exp), init_ccode, for_loc)
                        val i_id = get_iter_id(0, at_ids, for_letters.nth(dims_ofs))
                        val (i_exp, _) = add_local(i_id, CTypInt,
                            default_tempvar_flags(), None, [], for_loc)
                        (i_exp :: i_exps, n_exp :: n_exps, init_checks, init_ccode)
                    } else {
                        val prev_n = n_exps.hd()
                        val init_check = CExpBinary(COpCmp(CmpEQ), prev_n, calc_n_exp, (CTypBool, for_loc))
                        (i_exps, n_exps, init_check :: init_checks, init_ccode)
                    }
                    val c_et = C_gen_types.ktyp2ctyp(et, for_loc)
                    val c_et_ptr = make_ptr(c_et)
                    val colname = pp(col_)
                    val iter_id = gen_idc(cm_idx, "iter_" + colname)
                    val c_et_exp = CExpTyp(c_et, for_loc)
                    val (iter_exp, init_ccode) = create_cdefval(iter_id, CTypName(get_id("fx_rrbiter_t")),
                        default_tempval_flags(), "", None, init_ccode, for_loc)
                    val start_read_exp = make_call(get_id("FX_RRB_START_READ"),
                        [:: c_et_exp, col_exp, iter_exp ],
                        std_CTypVoidPtr, for_loc)
                    val ptr_id = gen_idc(cm_idx, "ptr_" + colname)
                    val (ptr_exp, init_ccode) = create_cdefval(ptr_id, c_et_ptr,
                        default_tempval_flags(), "", Some(start_read_exp), init_ccode, for_loc)
                    val get_vec_elem = CExpUnary(COpDeref, ptr_exp, (c_et, for_loc))
                    val incr_call_exp = make_call(get_id("FX_RRB_NEXT"),
                        [:: c_et_exp, iter_exp, ptr_exp ], std_CTypVoidPtr, for_loc)
                    val incr_exp = CExpBinary(COpAssign, ptr_exp, incr_call_exp, (CTypVoid, for_loc))
                    val elem_flags =
                        if is_ktyp_scalar(et) { default_tempvar_flags() }
                        else { default_tempref_flags() }
                    ([], i_exps, n_exps, for_checks, incr_exp :: incr_exps,
                    init_checks, init_ccode, pre_body_ccode,
                    (iter_val_i, get_vec_elem, elem_flags) :: body_elems, post_checks)
                | _ =>
                    throw compile_err(for_loc, for_err_msg(for_idx, nfors, k,
                        f"cannot iterate over '{atom2str(a)}' of type '{ktyp}'; it needs to be array, list, vector or string"))
                }
            | _ => throw compile_err(for_loc, for_err_msg(for_idx, nfors, k,
                        "unsupported type of the for loop iteration domain"))
            }
            (lists_i + list_exps, i_exps, n_exps, for_checks, incr_exps,
            init_checks, init_ccode, pre_body_ccode, body_elems, post_checks)
        }
        /*val default_exp = "<complex_exp>"
        println(f"i_exps: {[::for i_exp <- i_exps {| CExpIdent(i, _) => string(i) | _ => default_exp} ]}")
        println(f"n_exps: {[::for n_exp <- n_exps {| CExpIdent(i, _) => string(i) | _ => default_exp} ]}")*/
        /* add initial size checks */
        val init_ccode = add_size_eq_check(init_checks.rev(), init_ccode, lbl, for_loc)
        /* in the case of 1D arrays put pre_body_ccode immediately after initialization code */
        val (init_ccode, pre_body_ccode) =
            if ndims > 1 { (init_ccode, pre_body_ccode) }
            else { (pre_body_ccode + init_ccode, []) }
        /* add "post" checks, if needed */
        val post_checks = post_checks.rev()
        val post_checks =
            if post_checks != [] && i_exps != []
                { CExpBinary(COpCmp(CmpEQ), i_exps.hd(), n_exps.hd(), (CTypBool, end_for_loc)) :: post_checks }
            else if post_checks.length() > 1 { post_checks }
            else { [] }
        val post_ccode = add_size_post_check(post_checks, [], lbl, end_for_loc)
        /* form for-loop headers */
        val fold k_final=0, for_headers=[] for i_exp <- i_exps, n_exp <- n_exps {
            val ifor_loc = get_cexp_loc(n_exp)
            val init_exps = [:: make_assign(i_exp, make_int_exp(0, ifor_loc)) ]
            val check_exp = CExpBinary(COpCmp(CmpLT), i_exp, n_exp, (CTypBool, ifor_loc))
            val incr_exps_i = [:: CExpUnary(COpSuffixInc, i_exp, (CTypInt, ifor_loc)) ]
            val (check_exp, incr_exps_i) =
            if k_final > 0 {
                (check_exp, incr_exps_i)
            } else {
                val fold check_exp = check_exp for e <- for_checks.rev() {
                    CExpBinary(COpLogicAnd, check_exp, e, (CTypBool, ifor_loc))
                }
                (check_exp, incr_exps_i + incr_exps.rev())
            }
            (k_final + 1, (Some(CTypInt), init_exps, Some(check_exp), incr_exps_i) :: for_headers)
        }
        /* if we have open loop or loop over lists (i.e. i_exps and n_exps are empty lists),
           we still need to form the for-loop statement */
        val for_headers =
            if k_final > 0 { for_headers }
            else {
                val fold check_exp_opt = (None: cexp_t?) for check_i <- for_checks.rev() {
                    Some(match check_exp_opt {
                    | Some e => CExpBinary(COpLogicAnd, e, check_i, (CTypBool, for_loc))
                    | _ => check_i
                    })
                }
                [:: (None, [], check_exp_opt, incr_exps)]
            }
        (for_headers.rev(), list_exps, i_exps, n_exps, init_ccode, pre_body_ccode, body_elems, post_ccode)
    }

    fun decl_for_body_elems(body_elems: (id_t, cexp_t, val_flags_t) list, body_ccode: ccode_t) =
        fold body_ccode = body_ccode for (v, e, flags) <- body_elems {
            val (ctyp, loc) = get_cexp_ctx(e)
            val (_, body_ccode) =
                if flags.val_flag_tempref {
                    add_local_tempref(v, ctyp, flags, e, body_ccode, loc)
                } else {
                    add_local(v, ctyp, flags, Some(e), body_ccode, loc)
                }
            body_ccode
        }

    fun process_cases(cases: (kexp_t list, kexp_t) list, dstexp_r: cexp_t? ref,
                      ccode: ccode_t, is_catch_case: bool, kloc: loc_t)
    {
        val end_loc = get_end_loc(kloc)
        val endmatch = make_label(if is_catch_case {"endcatch"} else {"endmatch"}, end_loc)
        val fold have_default = false, em_label_used = false,
                 have_epilogues = false, have_complex_branches = false,
                 all_cases_ccode = [] for (checks_i, action_i) <- cases {
            val (cchecks_i, pre_cchecks_i) =
            fold checks_i = [], pre_checks_i = [] for check_ij <- checks_i {
                val (ccheck_ij, ccode_ij) = kexp2cexp(check_ij, ref None, [])
                (ccheck_ij :: checks_i, ccode_ij :: pre_checks_i)
            }
            val ai_loc = get_kexp_loc(action_i)
            val new_have_default =
                match checks_i {
                | [] =>
                    if have_default { throw compile_err(ai_loc, "cgen: more than one default action") }
                    else { true }
                | _ => have_default
                }
            /* for each action we create a dedicated scope with its own cleanup section;
                this is because it can be very lengthy match expressions with
                many cases (like in compilers), so if we put all the
                non-trivial locals into the common scope, the cleanup section
                will be very inefficient */
            val ai_end_loc = get_end_loc(ai_loc)
            val (ai_ccode, em_label_used_i, have_epilogue_i) =
            match action_i {
            | KExpThrow _ =>
                val (_, ai_ccode) = kexp2cexp(action_i, dstexp_r, [])
                (ai_ccode, false, false)
            | _ =>
                new_block_ctx(BlockKind_Case, ai_loc)
                val (_, ai_ccode) = kexp2cexp(action_i, dstexp_r, [])
                val bctx_i = curr_block_ctx(kloc)
                val {bctx_prologue, bctx_cleanup, bctx_label, bctx_label_used} = *bctx_i
                val prologue = bctx_prologue
                val epilogue = bctx_cleanup.rev()
                val epilogue = if bctx_label_used == 0 { epilogue }
                               else { epilogue + [:: CStmtLabel(bctx_label, ai_end_loc)] }
                pop_block_ctx(ai_end_loc)
                val (em_label_used_i, epilogue) =
                    if new_have_default { (false, epilogue) }
                    else { (true, CStmtGoto(endmatch, ai_end_loc) :: epilogue) }
                val ai_ccode = epilogue + (ai_ccode + prologue)
                (ai_ccode, em_label_used_i, true)
            }
            val complex_branch_i =
                match (cchecks_i, pre_cchecks_i) {
                | ([], []) => false
                | ([:: c], [:: []]) => false
                | _ => true
                }
            val case_ccode =
                match (cchecks_i, pre_cchecks_i) {
                | ([], []) => ai_ccode
                | _ =>
                    val ai_stmt = rccode2stmt(ai_ccode, ai_loc)
                    /* generate the nested if statement;
                    [TODO] need to replace it with one
                    `if (expi0 && expi1 && ... && expi{n-1}) { action_i }`
                    if possible (i.e. when all pre_check_ij's are empty) */
                    fold case_ccode = [:: ai_stmt]
                        for check_ij <- cchecks_i, pre_check_ij <- pre_cchecks_i {
                            val case_stmt = rccode2stmt(case_ccode, ai_end_loc)
                            val checkij_loc = get_cexp_loc(check_ij)
                            val if_stmt = make_if(check_ij, case_stmt,
                                                  CStmtNop(ai_end_loc), checkij_loc)
                            if_stmt :: pre_check_ij
                        }
                }
            (new_have_default,
            em_label_used || em_label_used_i,
            have_epilogues || have_epilogue_i,
            have_complex_branches || complex_branch_i,
            case_ccode :: all_cases_ccode)
        }
        val parent_lbl = curr_block_label(end_loc)
        val all_cases_ccode =
            if have_default { all_cases_ccode }
            else {
                val no_match_err = make_id_t_exp(get_id("FX_EXN_NoMatchError"), CTypCInt, end_loc)
                val throw_no_match = make_call(std_FX_FAST_THROW, [:: no_match_err, parent_lbl ], CTypVoid, end_loc)
                [:: CExp(throw_no_match)] :: all_cases_ccode
            }
        val (em_label_used, ccode) =
        match (have_complex_branches, all_cases_ccode) {
        | (false, else_s :: ifs) =>
            val fold complex_if = rccode2stmt(else_s, end_loc) for s_i <- ifs {
                match s_i {
                | [:: CStmtIf(c_i, then_i, CStmtNop _, loc_i)] =>
                    val then_i =
                    match stmt2ccode(then_i).rev() {
                    | (CStmtGoto (i, _) :: rest) when i == endmatch => rccode2stmt(rest, loc_i)
                    | _ => then_i
                    }
                    make_if(c_i, then_i, complex_if, loc_i)
                | _ => throw compile_err(end_loc, "cgen: unexpected statement in the chained match statement")
                }
            }
            (false, complex_if :: ccode)
        | _ =>
            (em_label_used, all_cases_ccode.concat() + ccode)
        }

        val ccode = if !em_label_used { ccode }
                    else { CStmtLabel(endmatch, end_loc) :: ccode }
        val ccode = if !have_epilogues { ccode }
                    else {
                        val check_exn = make_call(std_FX_CHECK_EXN, [:: parent_lbl], CTypVoid, end_loc)
                        CExp(check_exn) :: ccode
                    }
        ccode
    }

    /*
        cases:
           - input kexp is void:
                no expression should be stored anywhere,
                just need to update ccode and return "nop"
           - non-void expression,
                the result should be stored to some pre-defined value (dstid).
                returns this CExpIdent(dstid)
           - non-void expression (dstid=noid),
                the result should be returned as expression
                if needed, some temporary id is generated where the result is stored.
    */
    fun kexp2cexp(kexp: kexp_t, dstexp_r: cexp_t? ref, ccode: ccode_t)
    {
        val (ktyp, kloc) = get_kexp_ctx(kexp)
        val ctyp = C_gen_types.ktyp2ctyp(ktyp, kloc)
        val dummy_exp = make_dummy_exp(kloc)
        val (assign, result_exp, ccode) =
        match kexp {
        | KExpNop _ => (false, dummy_exp, ccode)
        | KExpBreak _ => val break_stmt = make_break_stmt(kloc)
                         (false, dummy_exp, break_stmt :: ccode)
        | KExpContinue _ => val continue_stmt = make_continue_stmt(kloc)
                            (false, dummy_exp, continue_stmt :: ccode)
        | KExpReturn (a_opt, _) =>
            val bctx = curr_block_ctx(kloc)
            bctx->bctx_return_used += 1
            return_used += 1
            val ccode = match (a_opt, *func_dstexp_r) {
                | (Some(a), Some(dst_exp)) =>
                    val (e, ccode) = atom2cexp(a, ccode, kloc)
                    val ctyp = get_cexp_typ(e)
                    C_gen_types.gen_copy_code(e, dst_exp, ctyp, ccode, kloc)
                | _ => ccode
                }
            val lbl = curr_block_label(kloc)
            val ret_exp = CExp(make_call(get_id("FX_RETURN"), [:: lbl], CTypVoid, kloc))
            (false, dummy_exp, ret_exp :: ccode)
        | KExpAtom (a, _) => val (e, ccode) = atom2cexp(a, ccode, kloc)
                             val e = fix_nil(e, ktyp)
                             (true, e, ccode)
        | KExpBinary (bop, a1, a2, _) =>
            val (int_divmod_operands, save_and_check) =
                match bop {
                | OpDiv | OpMod =>
                    val f1 = is_ktyp_integer(get_atom_ktyp(a1, kloc), true)
                    val f2 = is_ktyp_integer(get_atom_ktyp(a2, kloc), true)
                    /* if a2 is constant, k_cfold_dealias stage has already checked
                        that a2 is non-zero; we can skip non-zero check here */
                    val nonlit = match a2 { | AtomId _ => true | _ => false}
                    (f1 && f2, f2 && nonlit)
                | _ => (false, false)
                }
            val (ce1, ccode) = atom2cexp(a1, ccode, kloc)
            val (ce2, ccode) = atom2cexp_(a2, save_and_check, ccode, kloc)
            match bop {
            | OpPow =>
                val (need_cast, ce1, ce2, rtyp, f) =
                match ctyp {
                | CTypFloat 64 => (false, ce1, ce2, ctyp, get_id("pow"))
                | CTypFloat _ => (false, ce1, ce2, ctyp, get_id("powf"))
                | _ =>
                    val ce1 = CExpCast(ce1, CTypFloat(64), kloc)
                    val ce2 = CExpCast(ce2, CTypFloat(64), kloc)
                    (true, ce1, ce2, CTypFloat(64), get_id("pow"))
                }
                val e = make_call(f, [:: ce1, ce2], rtyp, kloc)
                val e = if need_cast { CExpCast(e, ctyp, kloc) }
                        else { e }
                (true, e, ccode)
            | OpDiv =>
                if save_and_check {
                    val lbl = curr_block_label(kloc)
                    val chk_denom = make_call(get_id("FX_CHECK_DIV_BY_ZERO"), [:: ce2, lbl ], CTypVoid, kloc)
                    val div_exp = CExpBinary(COpDiv, ce1, ce2, (ctyp, kloc))
                    (true, div_exp, CExp(chk_denom) :: ccode)
                } else {
                    (true, CExpBinary(COpDiv, ce1, ce2, (ctyp, kloc)), ccode)
                }
            | OpMod =>
                if save_and_check {
                    val lbl = curr_block_label(kloc)
                    val chk_denom = make_call(get_id("FX_CHECK_DIV_BY_ZERO"), [:: ce2, lbl ], CTypVoid, kloc)
                    val mod_exp = CExpBinary(COpMod, ce1, ce2, (ctyp, kloc))
                    (true, mod_exp, CExp(chk_denom) :: ccode)
                } else if int_divmod_operands {
                    (true, CExpBinary(COpMod, ce1, ce2, (ctyp, kloc)), ccode)
                } else {
                    val (need_cast, ce1, ce2, rtyp, f) =
                    match ctyp {
                    | CTypInt | CTypFloat 32 => (false, ce1, ce2, ctyp, get_id("fmodf"))
                    | CTypFloat 64 => (false, ce1, ce2, ctyp, get_id("fmod"))
                    | _ =>
                        val ce1 = CExpCast(ce1, CTypFloat(64), kloc)
                        val ce2 = CExpCast(ce2, CTypFloat(64), kloc)
                        (true, ce1, ce2, CTypFloat(64), get_id("fmod"))
                    }
                    val e = make_call(f, [:: ce1, ce2], rtyp, kloc)
                    val e = if need_cast { CExpCast(e, ctyp, kloc) }
                            else { e }
                    (true, e, ccode)
                }
            | OpCons =>
                /*
                    l = e1 :: e2;
                    if !dstexp_r = None && (e2 is single-use id from u1vals) && (ce2 is id) then
                        re-use ce2 as l
                    else
                        obtain l using get_dstexp.
                */
                val a2_id = match a2 { | AtomId i when u1vals.mem(i) => i | _ => noid }
                val ce2_id = match ce2 { | CExpIdent (i, _) => i | _ => noid }
                val (reuse_ce2, (l_exp, _)) =
                    if dstexp_r->isnone() && a2_id != noid && ce2_id != noid { (true, (ce2, [])) }
                    else { (false, get_dstexp(dstexp_r, "lst", ctyp, [], kloc)) }
                val ccode = make_cons_call(ce1, ce2, !reuse_ce2, l_exp, ccode, kloc)
                (false, l_exp, ccode)
            | _ =>
                val (c_bop, bop_ctyp) =
                    match bop {
                    | OpAdd => (COpAdd, ctyp)
                    | OpSub => (COpSub, ctyp)
                    | OpMul => (COpMul, ctyp)
                    | OpDiv => (COpDiv, ctyp)
                    | OpShiftLeft => (COpShiftLeft, ctyp)
                    | OpShiftRight => (COpShiftRight, ctyp)
                    | OpBitwiseAnd => (COpBitwiseAnd, match ktyp {KTypBool => CTypInt | _ => ctyp})
                    | OpBitwiseOr => (COpBitwiseOr, match ktyp {KTypBool => CTypInt | _ => ctyp})
                    | OpBitwiseXor => (COpBitwiseXor, ctyp)
                    | OpCmp(cmpop) => (COpCmp(cmpop), ctyp)
                    | OpCons | OpPow | OpMod | OpLogicAnd | OpLogicOr | OpSpaceship | OpDotSpaceship
                    | OpDotAdd | OpDotSub | OpDotMul | OpDotDiv | OpDotMod | OpDotPow | OpDotCmp _ | OpSame | OpRDiv | OpAugBinary(_) =>
                        throw compile_err(kloc, f"cgen: unsupported op '{bop}' at this stage")
                    }
                match (c_bop, get_cexp_typ(ce1)) {
                | (COpCmp(CmpEQ), CTypString) =>
                    val f_exp = get_id("fx_streq")
                    val call_streq = make_call(f_exp, [:: cexp_get_addr(ce1), cexp_get_addr(ce2) ], CTypBool, kloc)
                    (true, call_streq, ccode)
                | (COpAdd, CTypVector _) =>
                    val (dst_exp, ccode) = get_dstexp(dstexp_r, "v", ctyp, ccode, kloc)
                    val call_concat = make_call(get_id("fx_rrb_concat"),
                        [:: cexp_get_addr(ce1), cexp_get_addr(ce2), cexp_get_addr(dst_exp) ], CTypCInt, kloc)
                    val ccode = add_fx_call(call_concat, ccode, kloc)
                    (false, dst_exp, ccode)
                | _ =>
                    val res = CExpBinary(c_bop, ce1, ce2, (bop_ctyp, kloc))
                    val res = if ctyp == bop_ctyp {res} else {CExpCast(res, ctyp, kloc)}
                    (true, res, ccode)
                }
            }
        | KExpUnary (OpMkRef, a1, _) =>
            val (ce1, ccode) = atom2cexp(a1, ccode, kloc)
            val (r_exp, _) = get_dstexp(dstexp_r, "r", ctyp, [], kloc)
            val ccode = make_mkref_call(ce1, r_exp, ccode, kloc)
            (false, r_exp, ccode)
        | KExpUnary (OpDeref, a1, _) =>
            val a_id = match a1 {
                       | AtomId a_id => a_id
                       | _ => throw compile_err(kloc, "cgen: deref operand is not an identifier")
                       }
            val (ce, ccode) = id2cexp(a_id, false, ccode, kloc)
            val n_id = get_id("data")
            (true, cexp_arrow(ce, n_id, ctyp), ccode)
        | KExpUnary (uop, a1, _) =>
            val (ce1, ccode) = atom2cexp(a1, ccode, kloc)
            val c_uop =
                match uop {
                | OpPlus => COpPlus
                | OpNegate => COpNegate
                | OpBitwiseNot => COpBitwiseNot
                | OpLogicNot => COpLogicNot
                | OpDeref | OpMkRef | OpExpand | OpDotMinus | OpApos =>
                    throw compile_err(kloc, f"cgen: unsupported unary op '{uop}'")
                }
            (true, CExpUnary(c_uop, ce1, (ctyp, kloc)), ccode)
        | KExpIntrin (intr, args, _) =>
            match (intr, args) {
            | (IntrinVariantTag, [:: v]) =>
                val (cv, ccode) = atom2cexp(v, ccode, kloc)
                val ktyp = get_atom_ktyp(v, kloc)
                val {ktp_ptr} = K_annotate.get_ktprops(ktyp, kloc)
                val extract_ctag =
                    if ktp_ptr { make_call(std_FX_REC_VARIANT_TAG, [:: cv], CTypCInt, kloc) }
                    else { cexp_mem(cv, get_id("tag"), CTypCInt) }
                val extract_ctag =
                    match ktyp {
                    | KTypName tn =>
                        match kinfo_(tn, kloc) {
                        | KVariant (ref {kvar_flags, kvar_cases}) =>
                            val have_tag = kvar_flags.var_flag_have_tag
                            val is_recursive = kvar_flags.var_flag_recursive
                            val ncases = kvar_cases.length()
                            if have_tag { extract_ctag }
                            else if ncases == 1 && !is_recursive { make_int_exp(1, kloc) }
                            else if ncases <= 2 && is_recursive {
                                CExpBinary(COpAdd,
                                    CExpBinary(COpCmp(CmpNE), cv, make_nullptr(kloc), (CTypCInt, kloc)),
                                    CExpLit(KLitInt(1i64), (CTypCInt, kloc)),
                                    (CTypCInt, kloc))
                            } else {
                                throw compile_err(kloc, f"cgen: variant '{pp(tn)}' with no tag has {ncases} cases, is_recursive={is_recursive}")
                            }
                        | _ => throw compile_err(kloc, f"cgen: unexpected type '{idk2str(tn, kloc)}'; should be variant of exception")
                        }
                    | _ => extract_ctag
                    }
                (true, extract_ctag, ccode)
            | (IntrinVariantCase, [:: v, vn_val]) =>
                val (cv, ccode) = atom2cexp(v, ccode, kloc)
                val vktyp = get_atom_ktyp(v, kloc)
                val {ktp_ptr} = K_annotate.get_ktprops(vktyp, kloc)
                match (vktyp, vn_val) {
                | (KTypExn, AtomId vn) =>
                    match cinfo_(vn, kloc) {
                    | CExn (ref {cexn_data}) =>
                        val exn_data = cexp_mem(cv, get_id("data"), CTypAny)
                        val exn_data = make_call(get_id("FX_EXN_DATA"), [:: CExpTyp(CTypName(cexn_data), kloc), exn_data ], ctyp, kloc)
                        (true, exn_data, ccode)
                    | _ =>
                        throw compile_err(kloc,
                            f"cgen: information about exception '{idk2str(vn, kloc)}' is not found")
                    }
                | (_, AtomLit(KLitInt(idx))) =>
                    val case_id = match deref_ktyp(vktyp, kloc) {
                        | KTypName(n) =>
                            match kinfo_(n, kloc) {
                            | KVariant (ref {kvar_cases}) => get_orig_id(kvar_cases.nth(int(idx)-1).0)
                            | _ => throw compile_err(kloc, f"cgen: invalid type '{pp(n)}'; variant is expected")
                            }
                        | _ => throw compile_err(kloc, f"cgen: invalid type '{vktyp}'; variant is expected")
                        }
                    val cvu = if ktp_ptr { cexp_arrow(cv, get_id("u"), CTypAny) }
                              else { cexp_mem(cv, get_id("u"), CTypAny) }
                    val celem = cexp_mem(cvu, case_id, ctyp)
                    (true, celem, ccode)
                | _ => throw compile_err(kloc, "cgen: invalid IntrinVariantCase 2nd parameter")
                }
            | (IntrinListHead, [:: l]) =>
                val (cl, ccode) = atom2cexp(l, ccode, kloc)
                (true, cexp_arrow(cl, get_id("hd"), ctyp), ccode)
            | (IntrinListTail, [:: l]) =>
                val (cl, ccode) = atom2cexp(l, ccode, kloc)
                (true, cexp_arrow(cl, get_id("tl"), ctyp), ccode)
            | (IntrinQueryIface, [:: src]) =>
                val dst_iface = match get_cinterface_opt(ctyp, kloc) {
                    | Some(iface) => iface
                    | _ => throw compile_err(kloc,
                        "the destination type '{ctyp2str(ctyp, kloc).0}' of 'IntrinQueryIface' intrinsic is not an inteface")
                    }
                val (src_exp, ccode) = atom2cexp(src, ccode, kloc)
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "iface", ctyp, ccode, kloc)
                val src_typ = get_cexp_typ(src_exp)
                val get_iface_exp = match src_typ {
                    | CTypName(tn) =>
                        match cinfo_(tn, kloc) {
                        | CInterface _ =>
                            make_call(std_fx_query_iface,
                                [:: cexp_get_addr(src_exp), make_id_exp(dst_iface->ci_id, kloc), cexp_get_addr(dst_exp) ],
                                CTypCInt, kloc)
                        | CTyp (ref {ct_ifaces}) =>
                            var idx = -1
                            for iface@i <- ct_ifaces {
                                if same_or_parent(iface, dst_iface->ci_name, kloc) {
                                    idx = i; break
                                }
                            }
                            if idx < 0 {
                                throw compile_err(kloc,
                                    f"the interface '{dst_iface->ci_cname}' is not implemented by '{ctyp2str(src_typ, kloc).0}'")
                            }
                            make_call(std_fx_make_iface,
                                [:: src_exp, make_int_exp(idx, kloc), cexp_get_addr(dst_exp) ],
                                CTypCInt, kloc)
                        | _ =>
                            throw compile_err(kloc,
                                f"invalid type '{ctyp2str(src_typ, kloc).0}' of the first argument of 'IntrinQueryInterface'")
                        }
                    | _ =>
                        throw compile_err(kloc,
                            f"invalid type '{ctyp2str(src_typ, kloc).0}' of the first argument of 'IntrinQueryInterface'")
                    }
                val ccode = add_fx_call(get_iface_exp, ccode, kloc)
                (false, dst_exp, ccode)
            | (IntrinGetObject, [:: src]) =>
                val (src_exp, ccode) = atom2cexp(src, ccode, kloc)
                val src_typ = get_cexp_typ(src_exp)
                val src_iface = match get_cinterface_opt(src_typ, kloc) {
                    | Some(iface) => iface
                    | _ => throw compile_err(kloc,
                        "the argument of 'IntrinGetObject' of type '{ctyp2str(src_typ, kloc).0}' is not an inteface")
                    }
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "iface", ctyp, ccode, kloc)
                val get_obj_exp = match ctyp {
                    | CTypName(tn) =>
                        match cinfo_(tn, kloc) {
                        | CTyp (ref {ct_ifaces}) =>
                            var idx = -1
                            for iface@i <- ct_ifaces {
                                if same_or_parent(iface, src_iface->ci_name, kloc) {
                                    idx = i; break
                                }
                            }
                            if idx < 0 {
                                throw compile_err(kloc,
                                    f"the interface '{src_iface->ci_cname}' is not implemented by '{ctyp2str(ctyp, kloc).0}'")
                            }
                            make_call(std_fx_get_object,
                                [:: cexp_get_addr(src_exp), make_int_exp(idx, kloc), cexp_get_addr(dst_exp) ],
                                CTypCInt, kloc)
                        | _ =>
                            throw compile_err(kloc,
                                f"invalid destination type '{ctyp2str(ctyp, kloc).0}'. It must be an object that implements some interfaces")
                        }
                    | _ =>
                        throw compile_err(kloc,
                            f"invalid destination type '{ctyp2str(ctyp, kloc).0}'. It must be an object that implements some interfaces")
                    }
                val ccode = add_fx_call(get_obj_exp, ccode, kloc)
                (false, dst_exp, ccode)
            | (IntrinPopExn, []) =>
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "curr_exn", CTypExn, ccode, kloc)
                val fx_status_exp = make_fx_status(kloc)
                val e = make_call(get_id("fx_exn_get_and_reset"), [:: fx_status_exp, cexp_get_addr(dst_exp)], CTypVoid, kloc)
                (false, dst_exp, CExp(e) :: ccode)
            | (IntrinStrConcat, al) =>
                val fold strs = [], ccode = ccode for a <- al {
                    val (c_exp, ccode) = atom2cexp_(a, true, ccode, kloc)
                    val s_exp =
                        match (a, get_cexp_typ(c_exp)) {
                        | (AtomLit(KLitChar c), CTypUniChar) =>
                            make_call(get_id("FX_MAKE_STR1"),
                                [:: make_lit_exp(KLitString(string(c)), kloc) ], CTypString, kloc)
                        | (AtomId n, CTypUniChar) =>
                            make_call(get_id("FX_MAKE_VAR_STR1"), [:: c_exp], CTypString, kloc)
                        | _ => c_exp
                        }
                    (s_exp :: strs, ccode)
                }
                val strs_id = gen_idc(cm_idx, "strs")
                val strs_ctyp = CTypRawArray([::CTypConst], CTypString)
                val strs0 = CExpInit(strs.rev(), (strs_ctyp, kloc))
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "concat_str", CTypString, ccode, kloc)
                val (strs_exp, sub_ccode) = create_cdefval(strs_id, strs_ctyp, default_tempval_flags(), "", Some(strs0), [], kloc)
                val call_exp = make_call(get_id("fx_strjoin"),
                                [:: make_nullptr(kloc), make_nullptr(kloc), make_nullptr(kloc),
                                   strs_exp, make_int_exp(strs.length(), kloc), cexp_get_addr(dst_exp)],
                                CTypInt, kloc)
                val sub_ccode = add_fx_call(call_exp, sub_ccode, kloc)
                val ccode = rccode2stmt(sub_ccode, kloc) :: ccode
                (false, dst_exp, ccode)
            | (IntrinGetSize, [:: arr_or_str]) =>
                val (arr_exp, ccode) = atom2cexp(arr_or_str, ccode, kloc)
                val c_e =
                    match get_atom_ktyp(arr_or_str, kloc) {
                    | KTypString => make_call(get_id("FX_STR_LENGTH"), [:: arr_exp], CTypInt, kloc)
                    | KTypArray _ => make_call(get_id("FX_ARR_SIZE"), [:: arr_exp, make_int_exp(0, kloc) ], CTypInt, kloc)
                    | KTypVector _ => make_call(get_id("FX_RRB_SIZE"), [:: arr_exp], CTypInt, kloc)
                    | ktyp =>
                        throw compile_err( kloc,
                            f"cgen: unsupported container type {ktyp} of {atom2str(arr_or_str)} in KExpIntrin(IntrinGetSize...)")
                    }
                (true, c_e, ccode)
            | (IntrinGetSize, [:: arr_or_str, AtomLit(KLitInt i)]) =>
                val (arr_exp, ccode) = atom2cexp(arr_or_str, ccode, kloc)
                val c_e =
                    match (get_atom_ktyp(arr_or_str, kloc), i) {
                    | (KTypString, 0i64) => make_call(get_id("FX_STR_LENGTH"), [:: arr_exp], CTypInt, kloc)
                    | (KTypVector _, 0i64) => make_call(get_id("FX_RRB_SIZE"), [:: arr_exp], CTypInt, kloc)
                    | (KTypArray (ndims, _), i) =>
                        if !(0i64 <= i && i < int64(ndims)) {
                            throw compile_err(kloc, f"array dimension index {i}i is beyond dimensionality {ndims}")
                        }
                        make_call(get_id("FX_ARR_SIZE"), [:: arr_exp, make_int__exp(i, kloc) ], CTypInt, kloc)
                    | _ => throw compile_err(kloc, "cgen: unsupported container type in KExpIntrin(IntrinGetSize...)")
                    }
                (true, c_e, ccode)
            | (IntrinCheckIdx, [:: arrsz, idx]) =>
                val lbl = curr_block_label(kloc)
                val (arrsz_exp, ccode) = atom2cexp(arrsz, ccode, kloc)
                val (idx_exp, ccode) = atom2cexp(idx, ccode, kloc)
                val chk = make_call(get_id("FX_CHKIDX_SCALAR"), [:: arrsz_exp, idx_exp, lbl ], CTypVoid, kloc)
                (false, dummy_exp, CExp(chk) :: ccode)
            | (IntrinCheckIdxRange, [:: arrsz, a, b, delta, scale, shift]) =>
                val lbl = curr_block_label(kloc)
                val (arrsz_exp, ccode) = atom2cexp(arrsz, ccode, kloc)
                val (a_exp, ccode) = atom2cexp(a, ccode, kloc)
                val (b_exp, ccode) = atom2cexp(b, ccode, kloc)
                val (delta_exp, ccode) = atom2cexp(delta, ccode, kloc)
                val (scale_exp, ccode) = atom2cexp(scale, ccode, kloc)
                val (shift_exp, ccode) = atom2cexp(shift, ccode, kloc)
                val chk = make_call(get_id("FX_CHKIDX_RANGE"), [:: arrsz_exp, a_exp, b_exp, delta_exp,
                                    scale_exp, shift_exp, lbl], CTypVoid, kloc)
                (false, dummy_exp, CExp(chk) :: ccode)
            | (IntrinMakeFPbyFCV, [:: AtomId(fname)]) =>
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "make_fp_by_fcv", ctyp, ccode, kloc)
                val macro = CExp(make_call( std_FX_MAKE_FP_BY_FCV, [:: make_id_t_exp(fname, std_CTypVoidPtr, kloc), dst_exp], CTypVoid, kloc ))
                (false, dummy_exp, macro :: ccode)
            | (IntrinGEMM, [:: m1, t1, rs1, re1, rd1, cs1, ce1, cd1, m2, t2, rs2, re2, rd2, cs2, ce2, cd2]) =>
                fun handle_idx(range_idx: atom_t, deflt: int64) = match range_idx {
                    | AtomLit(KLitNil _) => AtomLit(KLitInt(deflt))
                    | _ => range_idx
                }

                val (m1exp, ccode) = atom2cexp(m1, ccode, kloc)
                val (t1exp, ccode) = atom2cexp(t1, ccode, kloc)
                val (rs1exp, ccode) = atom2cexp(handle_idx(rs1, 0i64), ccode, kloc)
                val (re1exp, ccode) = atom2cexp(handle_idx(re1,-1i64), ccode, kloc)
                val (rd1exp, ccode) = atom2cexp(handle_idx(rd1, 1i64), ccode, kloc)
                val (cs1exp, ccode) = atom2cexp(handle_idx(cs1, 0i64), ccode, kloc)
                val (ce1exp, ccode) = atom2cexp(handle_idx(ce1,-1i64), ccode, kloc)
                val (cd1exp, ccode) = atom2cexp(handle_idx(cd1, 1i64), ccode, kloc)
                val (m2exp, ccode) = atom2cexp(m2, ccode, kloc)
                val (t2exp, ccode) = atom2cexp(t2, ccode, kloc)
                val (rs2exp, ccode) = atom2cexp(handle_idx(rs2, 0i64), ccode, kloc)
                val (re2exp, ccode) = atom2cexp(handle_idx(re2,-1i64), ccode, kloc)
                val (rd2exp, ccode) = atom2cexp(handle_idx(rd2, 1i64), ccode, kloc)
                val (cs2exp, ccode) = atom2cexp(handle_idx(cs2, 0i64), ccode, kloc)
                val (ce2exp, ccode) = atom2cexp(handle_idx(ce2,-1i64), ccode, kloc)
                val (cd2exp, ccode) = atom2cexp(handle_idx(cd2, 1i64), ccode, kloc)
                val (dst_exp, ccode) = get_dstexp(dstexp_r, "gemm", ctyp, ccode, kloc)

                val call_exp = make_call(get_id("fx_gemm"),
                    [:: cexp_get_addr(m1exp), t1exp, rs1exp, re1exp, rd1exp, cs1exp, ce1exp, cd1exp,
                      cexp_get_addr(m2exp), t2exp, rs2exp, re2exp, rd2exp, cs2exp, ce2exp, cd2exp,
                       cexp_get_addr(dst_exp)],
                    CTypVoid, kloc)
                val ccode = add_fx_call(call_exp, ccode, kloc)
                (false, dst_exp, ccode)
            | (IntrinMath(s), args) =>
                val fold cargs = [], ccode = ccode for a <- args {
                    val (c_exp, ccode) = atom2cexp(a, ccode, kloc)
                    (c_exp :: cargs, ccode)
                }
                val fname = pp(s)
                val argtyp = get_atom_ktyp(args.hd(), kloc)
                val prefix = match fname {
                            | "floor" | "ceil" | "round " | "min" | "max" => "fx_"
                            | _ => ""
                            }
                val suffix = (match argtyp {
                            | KTypFloat(32) => "f"
                            | KTypInt => "i"
                            | _ => ""})
                val fname = prefix + fname + suffix
                val call_f = make_call(get_id(fname), cargs.rev(), ctyp, kloc)
                (true, call_f, ccode)
            | (IntrinSaturate(sct), [:: arg]) =>
                val (c_exp, ccode) = atom2cexp(arg, ccode, kloc)
                val argtyp = get_atom_ktyp(arg, kloc)
                val prefix = match argtyp {
                            | KTypInt => "sat_I2"
                            | KTypFloat(32) => "sat_f2"
                            | KTypFloat(64) => "sat_d2"
                            | _ =>
                                throw compile_err(kloc, f"cgen: unsupported argument type {argtyp} of sat_...() intrinsic")
                            }
                val suffix = match ctyp {
                            | CTypUInt(n) => f"u{n}"
                            | CTypSInt(n) => f"i{n}"
                            | _ =>
                                throw compile_err(kloc, f"cgen: unsupported return type {ctyp2str(ctyp, kloc)} of sat_...() intrinsic")
                            }
                val fname = prefix + suffix
                val call_f = make_call(get_id(fname), [:: c_exp], ctyp, kloc)
                (true, call_f, ccode)
            | (IntrinGetSlice, arr :: idxs) =>
                val (arr_exp, ccode) = atom2cexp(arr, ccode, kloc)
                val fold i_exps = [], ccode = ccode for i <- idxs {
                    val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                    (i_exp :: i_exps, ccode)
                }
                val i_exps = make_int_exp(0, kloc) :: i_exps
                val ndims = i_exps.length()
                val fname = std_FX_PTR_xD.nth(ndims-1)
                val elem_ctyp = match ctyp {
                    | CTypRawPtr(_, elem_ctyp) => elem_ctyp
                    | _ =>
                        throw compile_err(kloc, f"cgen: unexpected return type \
                            {ctyp2str(ctyp, kloc)} of IntrinGetSlice() intrinsic; \
                            should be raw pointer")
                    }
                val get_slice_exp = make_call(fname, CExpTyp(elem_ctyp, kloc) ::
                            arr_exp :: i_exps.rev(), make_ptr(elem_ctyp), kloc)
                (true, get_slice_exp, ccode)
            | (IntrinAccessSlice, [:: ptr, idx]) =>
                val (ptr_exp, ccode) = atom2cexp(ptr, ccode, kloc)
                val (i_exp, ccode) = atom2cexp(idx, ccode, kloc)
                val get_elem_exp = CExpBinary(COpArrayElem, ptr_exp, i_exp, (ctyp, kloc))
                (true, get_elem_exp, ccode)
            | _ => throw compile_err(kloc,
                f"cgen: unsupported KExpIntrin({intr}, ...) or the wrong number of arguments ({args.length()})")
            }
        | KExpSeq (el, _) =>
            fun process_seq(el: kcode_t, ccode: ccode_t) =
                match el {
                | [] => (dummy_exp, ccode)
                | [:: last] => kexp2cexp(last, dstexp_r, ccode)
                | e :: rest => val (_, ccode) = kexp2cexp(e, ref None, ccode)
                               process_seq(rest, ccode)
                }
            val (e, ccode) = process_seq(el, ccode)
            (false, e, ccode)
        | KExpSync(n, e) =>
            val parent_lbl = curr_block_label(kloc)
            val (dst_exp, ccode) = get_dstexp(dstexp_r, "t", ctyp, ccode, kloc)
            new_block_ctx(BlockKind_Block, kloc)
            val (_, sync_ccode) = kexp2cexp(e, dstexp_r, [])
            val bctx_sync = curr_block_ctx(kloc)
            val {bctx_prologue, bctx_cleanup, bctx_label, bctx_label_used} = *bctx_sync
            val epilogue = if bctx_label_used == 0 { bctx_cleanup }
                           else { bctx_cleanup + [:: CStmtLabel(bctx_label, kloc)] }
            val sync_ccode = epilogue + sync_ccode + bctx_prologue
            val c_e = rccode2stmt(sync_ccode, get_kexp_loc(e))
            pop_block_ctx(kloc)
            val check_exn = make_call(std_FX_CHECK_EXN, [:: parent_lbl], CTypVoid, kloc)
            (false, dst_exp, CExp(check_exn) :: CStmtSync(n, c_e) :: ccode)
        | KExpIf (c, e1, e2, _) =>
            val (cc, ccode) = kexp2cexp(c, ref None, ccode)
            val (dst_exp, ccode) = get_dstexp(dstexp_r, "t", ctyp, ccode, kloc)
            val (_, ccode1) = kexp2cexp(e1, dstexp_r, [])
            val (_, ccode2) = kexp2cexp(e2, dstexp_r, [])
            val c_e1 = rccode2stmt(ccode1, get_kexp_loc(e1))
            val c_e2 = rccode2stmt(ccode2, get_kexp_loc(e2))
            (false, dst_exp, make_if(cc, c_e1, c_e2, kloc) :: ccode)
        | KExpCall (f, args, _) =>
            val fold args = [], ccode = ccode for arg <- args {
                val (carg, ccode) = atom2cexp(arg, ccode, kloc)
                val carg = make_fun_arg(carg, kloc)
                (carg :: args, ccode)
            }
            val (f, ci) = match cinfo_(f, kloc) {
                          | CExn (ref {cexn_make}) => (cexn_make, cinfo_(cexn_make, kloc))
                          | ci => (f, ci)
                          }
            val (f_exp, have_out_arg, fv_args, is_nothrow, ccode) =
            match ci {
            | CFun cf =>
                val {cf_args, cf_rt, cf_flags, cf_cname, cf_loc} = *cf
                ensure_sym_is_defined_or_declared(f, kloc)
                val is_nothrow = cf_flags.fun_flag_nothrow
                val (_, ret_id, _, have_fv_arg) = unpack_fun_args(cf_args, cf_rt, is_nothrow)
                val f_exp = make_id_exp(f, cf_loc)
                val fv_args =
                    if !have_fv_arg { [] }
                    else if !cf_flags.fun_flag_uses_fv { [:: make_nullptr(kloc)] }
                    else if f == curr_func(kloc) { [:: make_id_t_exp(get_id("fx_fv"), std_CTypVoidPtr, cf_loc) ] }
                    else {
                        throw compile_err( kloc,
                        f"cgen: looks like lambda lifting did not transform '{cf_cname}' call correctly. " +
                        "Functions that access free variables must be called via closure " +
                        "(except for the case when function calls itself)")
                    }
                (f_exp, ret_id != noid, fv_args, is_nothrow, ccode)
            | CVal ({cv_typ, cv_flags, cv_loc}) =>
                if is_val_global(cv_flags) || cv_flags.val_flag_ctor > 0 {
                    ensure_sym_is_defined_or_declared(f, kloc)
                }
                val (fclo_exp, ccode) = id2cexp(f, false, ccode, kloc)
                val ftyp = deref_ktyp(get_idk_ktyp(f, kloc), kloc)
                val cftyp = C_gen_types.ktyp2ctyp(ftyp, kloc)
                val f_exp = cexp_mem(fclo_exp, get_id("fp"), cftyp)
                val fv_args = [:: cexp_mem(fclo_exp, get_id("fcv"), std_CTypVoidPtr) ]
                (f_exp, true, fv_args, false, ccode)
            | _ => throw compile_err(kloc, f"cgen: the called '{idc2str(f, kloc)}' is not a function nor value")
            }
            if !have_out_arg && ctyp != CTypVoid {
                val args = (fv_args + args).rev()
                val call_exp = CExpCall(f_exp, args, (ctyp, kloc))
                (true, call_exp, ccode)
            } else {
                val (args, dst_exp, ccode) =
                if ctyp == CTypVoid {
                    (args, dummy_exp, ccode)
                } else {
                    val (dst_exp, ccode) = get_dstexp(dstexp_r, "res", ctyp, ccode, kloc)
                    (cexp_get_addr(dst_exp) :: args, dst_exp, ccode)
                }
                val args = (fv_args + args).rev()
                val fcall_rt = if is_nothrow { CTypVoid } else { CTypCInt }
                val fcall_exp = CExpCall(f_exp, args, (fcall_rt, kloc))
                if is_nothrow {
                    (false, dst_exp, CExp(fcall_exp) :: ccode)
                } else {
                    val ccode = add_fx_call(fcall_exp, ccode, kloc)
                    (false, dst_exp, ccode)
                }
            }
        | KExpICall (io_pair, idx, args, _) =>
            val (io_cexp, ccode) = id2cexp(io_pair, true, ccode, kloc)
            // convert all the arguments to C expressions,
            // add the object pointer to the beginning of the list
            val obj_cexp = cexp_mem(io_cexp, get_id("obj"), std_CTypVoidPtr)
            val fold args = [:: make_fun_arg(obj_cexp, kloc)], ccode = ccode for arg <- args {
                val (carg, ccode) = atom2cexp(arg, ccode, kloc)
                val carg = make_fun_arg(carg, kloc)
                (carg :: args, ccode)
            }
            val t = get_cexp_typ(io_cexp)
            val obj_iface = match get_cinterface_opt(t, kloc) {
                | Some(iface) => iface
                | _ => throw compile_err(kloc, f"the first parameter (of type '{ctyp2str(t, kloc).0}') of KExpICall is not an interface")
                }
            val (mname, mt) = obj_iface->ci_all_methods.nth(idx)
            val vtbl = cexp_mem(io_cexp, get_id("vtbl"), std_CTypVoidPtr)
            val mexp = cexp_arrow(vtbl, mname, mt)
            // add return value
            val (args, dst_exp, ccode) =
                if ctyp == CTypVoid {
                    (args, dummy_exp, ccode)
                } else {
                    val (dst_exp, ccode) = get_dstexp(dstexp_r, "res", ctyp, ccode, kloc)
                    (cexp_get_addr(dst_exp) :: args, dst_exp, ccode)
                }
            // use nullptr as the pointer to closure data (because methods do not use closures)
            val args = make_nullptr(kloc) :: args
            val mcall_exp = CExpCall(mexp, args.rev(), (CTypCInt, kloc))
            val ccode = add_fx_call(mcall_exp, ccode, kloc)
            (false, dst_exp, ccode)
        | KExpMkTuple _ | KExpMkRecord _ =>
            val (args, prefix) = match kexp {
                | KExpMkTuple(args, _) => (args, "tup")
                | KExpMkRecord(args, _) => (args, "rec")
                | _ => throw compile_err(kloc, "unexpected expression")
                }
            val tcon = C_gen_types.get_constructor(ctyp, false, kloc)
            val fold cargs = [], ccode = ccode for a <- args {
                val (ca, ccode) = atom2cexp(a, ccode, kloc)
                val ca = if tcon == noid { ca } else { make_fun_arg(ca, kloc) }
                (ca :: cargs, ccode)
            }
            if tcon != noid {
                val (t_exp, ccode) = get_dstexp(dstexp_r, prefix, ctyp, ccode, kloc)
                val call_mktup = make_call(tcon, cargs.rev() + [:: cexp_get_addr(t_exp)], CTypVoid, kloc)
                (false, t_exp, CExp(call_mktup) :: ccode)
            } else {
                val tup = gen_idc(cm_idx, prefix)
                val e0 = CExpInit(cargs.rev(), (ctyp, kloc))
                val (t_exp, ccode) = create_cdefval(tup, ctyp, default_tempval_flags(), "", Some(e0), ccode, kloc)
                (true, t_exp, ccode)
            }
        | KExpMkClosure (make_fp, f, args, _) =>
            val fp_prefix = pp(f) + "_fp"
            if args == [] && make_fp == noid {
                val fp_id = gen_idc(cm_idx, fp_prefix)
                ensure_sym_is_defined_or_declared(f, kloc)
                val f_exp = make_id_exp(f, kloc)
                val e0 = CExpInit([::f_exp, make_nullptr(kloc) ], (ctyp, kloc))
                val (fp_exp, ccode) = create_cdefval(fp_id, ctyp, default_tempval_flags(), "", Some(e0), ccode, kloc)
                (true, fp_exp, ccode)
            } else {
                val fold cargs = [], ccode = ccode for a <- args {
                    val (ca, ccode) = atom2cexp(a, ccode, kloc)
                    val ca = make_fun_arg(ca, kloc)
                    (ca :: cargs, ccode)
                }
                val (fp_exp, ccode) = get_dstexp(dstexp_r, fp_prefix, ctyp, ccode, kloc)
                ensure_sym_is_defined_or_declared(make_fp, kloc)
                val call_mkclo = make_call(make_fp, cargs.rev() + [:: cexp_get_addr(fp_exp)], CTypVoid, kloc)
                (false, fp_exp, CExp(call_mkclo) :: ccode)
            }
        | KExpMkArray (all_literals, arows, _) =>
            val have_expanded = !all_literals && exists(for arow <- arows { exists(for (f, _) <- arow {f}) })
            val (dims, elem_ctyp) =
                match ctyp {
                | CTypArray (dims, elem_ctyp) => (dims, elem_ctyp)
                | _ => throw compile_err(kloc, "cgen: invalid output type of array construction expression")
                }
            if have_expanded {
                val (arr_exp, ccode) = get_dstexp(dstexp_r, "arr", ctyp, ccode, kloc)
                val scalars_id = gen_idc(cm_idx, "scalars")
                val scalars_exp = make_id_t_exp(scalars_id, make_ptr(elem_ctyp), kloc)
                val (_, scalars_data, tags_data, arr_data, ccode) =
                    fold nscalars = 0, scalars_data = [], tags_data = [],
                         arr_data = [], ccode = ccode for arow <- arows {
                    val fold nscalars = nscalars, scalars_data = scalars_data, tags_data = tags_data,
                             arr_data = arr_data, ccode = ccode for (f, a) <- arow {
                        val (e, ccode) = atom2cexp(a, ccode, kloc)
                        if f {
                            val elem_ktyp = get_atom_ktyp(a, kloc)
                            val (tag, elem_ptr) =
                            match deref_ktyp(elem_ktyp, kloc) {
                            | KTypArray (d, _) => (d, cexp_get_addr(e))
                            | KTypList _ => (100, e)
                            | KTypVector _ => (110, cexp_get_addr(e))
                            | _ => throw compile_err(kloc, f"cgen: the expanded structure {atom2str(a)} is not an array, vector or list")
                            }
                            (nscalars, scalars_data, make_int_exp(tag, kloc) :: tags_data, elem_ptr :: arr_data, ccode)
                        } else {
                            val (nscalars, scalars_data, arr_data_elem) =
                            match e {
                            | CExpIdent _ => (nscalars, scalars_data, cexp_get_addr(e))
                            | _ =>
                                (nscalars + 1, e :: scalars_data,
                                CExpBinary(COpAdd, scalars_exp, make_int_exp(nscalars, kloc), (std_CTypVoidPtr, kloc)))
                            }
                            (nscalars, scalars_data, make_int_exp(0, kloc) :: tags_data, arr_data_elem :: arr_data, ccode)
                        }
                    }
                    (nscalars, scalars_data, make_int_exp(127, kloc) :: tags_data, arr_data, ccode)
                }
                val (_, sub_ccode) = decl_plain_arr(scalars_id, elem_ctyp, scalars_data.rev(), [], kloc)
                val tags_data = (make_int_exp(-1, kloc) :: tags_data.tl()).rev()
                val (tags_exp, sub_ccode) = decl_plain_arr(gen_idc(cm_idx, "tags"), CTypSInt(8), tags_data, sub_ccode, kloc)
                val (arr_data_exp, sub_ccode) = decl_plain_arr(gen_idc(cm_idx, "parts"), std_CTypVoidPtr, arr_data.rev(), sub_ccode, kloc)
                val sizeof_elem_exp = make_call(std_sizeof, [:: CExpTyp(elem_ctyp, kloc) ], CTypSize_t, kloc)
                val free_f_exp =
                    match C_gen_types.get_free_f(elem_ctyp, true, false, kloc) {
                    | (_, Some free_f) => CExpCast(free_f, std_fx_free_t, kloc)
                    | _ => make_nullptr(kloc)
                    }
                val copy_f_exp =
                match C_gen_types.get_copy_f(elem_ctyp, true, false, kloc) {
                | (_, Some copy_f) => CExpCast(copy_f, std_fx_copy_t, kloc)
                | _ => make_nullptr(kloc)
                }
                val call_mkarr =
                make_call(
                    get_id("fx_compose_arr"),
                    [:: make_int_exp(dims, kloc), sizeof_elem_exp, free_f_exp,
                       copy_f_exp, tags_exp, arr_data_exp, cexp_get_addr(arr_exp)],
                    CTypCInt,
                    kloc)
                val sub_ccode = add_fx_call(call_mkarr, sub_ccode, kloc)
                val ccode = rccode2stmt(sub_ccode, kloc) :: ccode
                (false, arr_exp, ccode)
            } else {
                val (arr_exp, ccode) = get_dstexp(dstexp_r, "arr", ctyp, ccode, kloc)
                val nrows = arows.length()
                val ncols = arows.hd().length()
                val shape = if nrows > 1 { [:: nrows, ncols] } else { [:: ncols] }
                val shape = [:: for i <- shape { make_int_exp(i, kloc) } ]
                val fold data = [], ccode = ccode for arow <- arows {
                    fold data = data, ccode = ccode for (_, a) <- arow {
                        val (e, ccode) = atom2cexp(a, ccode, kloc)
                        (e :: data, ccode)
                    }
                }
                if all_literals {
                    val dims = shape.length()
                    val shape_ctyp = CTypRawArray([::CTypConst], CTypInt)
                    val shape_arr = CExpInit(shape, (shape_ctyp, kloc))
                    val (shape_exp, ccode_) = create_cdefval(gen_idc(cm_idx, "shape"), shape_ctyp,
                                                default_tempval_flags(), "", Some(shape_arr), [], kloc)
                    val data_id = gen_idc(cm_idx, "data")
                    val (data_exp, glob_data_ccode_) = decl_plain_arr(data_id, elem_ctyp,
                                                                    data.rev(), glob_data_ccode, kloc)
                    val data_cv = get_cval(data_id, kloc)
                    val data_flags = data_cv.cv_flags
                    set_idc_entry(data_id, CVal(data_cv.{cv_flags=data_flags.{val_flag_private=true}}))
                    glob_data_ccode = glob_data_ccode_
                    val (sizeof_elem_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elem_ctyp, kloc)
                    val call_mkarr = make_call( std_fx_make_arr, [:: make_int_exp(dims, kloc), shape_exp,
                                        sizeof_elem_exp, free_f_exp, copy_f_exp, data_exp,
                                        cexp_get_addr(arr_exp)], CTypCInt, kloc)

                    val ccode_ = add_fx_call(call_mkarr, ccode_, kloc)
                    (false, arr_exp, rccode2stmt(ccode_, kloc) :: ccode)
                } else {
                    val ccode = make_make_arr_call(arr_exp, shape, data.rev(), ccode, curr_block_label(kloc), kloc)
                    (false, arr_exp, ccode)
                }
            }
        | KExpMkVector (elems, _) =>
            val have_expanded = exists(for (f, _) <- elems {f})
            if have_expanded {
                val (vec_exp, ccode) = get_dstexp(dstexp_r, "vec", ctyp, ccode, kloc)
                val elem_ctyp =
                    match ctyp {
                    | CTypVector elem_ctyp => elem_ctyp
                    | _ => throw compile_err(kloc, "cgen: invalid output type of vector construction expression")
                    }
                val scalars_id = gen_idc(cm_idx, "scalars")
                val scalars_exp = make_id_t_exp(scalars_id, make_ptr(elem_ctyp), kloc)
                val (_, scalars_data, tags_data, vec_data, ccode) =
                    fold nscalars = 0, scalars_data = [], tags_data = [],
                        vec_data = [], ccode = ccode for (f, a) <- elems {
                        val (e, ccode) = atom2cexp(a, ccode, kloc)
                        if f {
                            val elem_ktyp = get_atom_ktyp(a, kloc)
                            val (tag, elem_ptr) =
                            match deref_ktyp(elem_ktyp, kloc) {
                            | KTypArray (d, _) => (d, cexp_get_addr(e))
                            | KTypList _ => (100, e)
                            | KTypVector _ => (110, cexp_get_addr(e))
                            | _ => throw compile_err(kloc, f"cgen: the expanded structure {atom2str(a)} is not an array, vector or list")
                            }
                            (nscalars, scalars_data, make_int_exp(tag, kloc) :: tags_data, elem_ptr :: vec_data, ccode)
                        } else {
                            val (nscalars, scalars_data, vec_data_elem) =
                            match e {
                            | CExpIdent _ => (nscalars, scalars_data, cexp_get_addr(e))
                            | _ =>
                                (nscalars + 1, e :: scalars_data,
                                CExpBinary(COpAdd, scalars_exp, make_int_exp(nscalars, kloc), (std_CTypVoidPtr, kloc)))
                            }
                            (nscalars, scalars_data, make_int_exp(0, kloc) :: tags_data, vec_data_elem :: vec_data, ccode)
                        }
                    }
                val (_, sub_ccode) = decl_plain_arr(scalars_id, elem_ctyp, scalars_data.rev(), [], kloc)
                val tags_data = (make_int_exp(-1, kloc) :: tags_data.tl()).rev()
                val (tags_exp, sub_ccode) = decl_plain_arr(gen_idc(cm_idx, "tags"), CTypSInt(8), tags_data, sub_ccode, kloc)
                val (vec_data_exp, sub_ccode) = decl_plain_arr(gen_idc(cm_idx, "parts"), std_CTypVoidPtr, vec_data.rev(), sub_ccode, kloc)
                val sizeof_elem_exp = make_call(std_sizeof, [:: CExpTyp(elem_ctyp, kloc) ], CTypSize_t, kloc)
                val free_f_exp =
                    match C_gen_types.get_free_f(elem_ctyp, true, false, kloc) {
                    | (_, Some free_f) => CExpCast(free_f, std_fx_free_t, kloc)
                    | _ => make_nullptr(kloc)
                    }
                val copy_f_exp =
                match C_gen_types.get_copy_f(elem_ctyp, true, false, kloc) {
                | (_, Some copy_f) => CExpCast(copy_f, std_fx_copy_t, kloc)
                | _ => make_nullptr(kloc)
                }
                val call_mkvec =
                make_call(
                    get_id("fx_compose_vec"),
                    [:: sizeof_elem_exp, free_f_exp, copy_f_exp, tags_exp,
                     vec_data_exp, cexp_get_addr(vec_exp)],
                    CTypCInt,
                    kloc)
                val sub_ccode = add_fx_call(call_mkvec, sub_ccode, kloc)
                val ccode = rccode2stmt(sub_ccode, kloc) :: ccode
                (false, vec_exp, ccode)
            } else {
                val fold data = [], ccode = ccode for (_, a) <- elems {
                    val (e, ccode) = atom2cexp(a, ccode, kloc)
                    (e :: data, ccode)
                }
                val (vec_exp, ccode) = get_dstexp(dstexp_r, "vec", ctyp, ccode, kloc)
                val ccode = make_make_vec_call(vec_exp, data.rev(), ccode, curr_block_label(kloc), kloc)
                (false, vec_exp, ccode)
            }
        | KExpAt (arr, border, interp, idxs, _) =>
            /*
                there are 2 major cases:
                1. some of the idxs are ranges. Then the result is array/string
                2. all the ranges are scalars. Then the result is array element/character

                1. In the first case need to call a special function
                    `FX_CALL(fx_subarr(arr, ranges, subarr));`.
                    where ranges is array, concatenation of the following groups:
                    (0, idx) scalar indices
                    (1, a, b, delta) closed ranges [:: a:b:delta]
                        (if a was missing, it's set to 0, if delta was missing, it's set to 1)
                    (2, a, delta) open ranges [:: a::delta]
                        (if delta was missing, it's set to 1)
                1.1. one special case is a single [:] range, which means flattening operation.
                    just call fx_flatten_arr().

                2. In the second case need first to process each index idx_k (k=0..ndims-1):
                   2.1. If idx_k is "fast index" - great, just use the expression for the index
                        (process it via `atom2cexp`)
                   2.2. Otherwise we need to use the index more than once, so we need to
                        store it to temporary variable (unless it's already an indentifier or constant)
                        and add it to the check
                        `FX_CHKIDX(FX_CHKIDX1(arr, k1, idx_k1) && FX_CHKIDX1(arr, k2, idx_k2) ..., catch_label);`
                    if all the indices are fast indices, the whole check is excluded, of course.
                    then we return `(true, FX_PTR_{ndims}D(elem_ctyp, arr, idx0, ..., idx{ndims-1}), ccode)`
            */
            if interp != InterpNone {
                throw compile_err(kloc, "cgen: inter-element interpolation is not supported yet")
            }
            val (arr_exp, ccode) = atom2cexp_(arr, false, ccode, kloc)
            val lbl = curr_block_label(kloc)
            val arr_ctyp = get_cexp_typ(arr_exp)
            match arr_ctyp {
            | CTypString =>
                match idxs {
                | [:: DomainFast(i)] =>
                    val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                    val get_elem_exp = make_call(std_FX_STR_ELEM, [:: arr_exp, i_exp ], CTypUniChar, kloc)
                    (true, get_elem_exp, ccode)
                | [:: DomainElem(i)] =>
                    val (i_exp, ccode) = atom2cexp_(i, true, ccode, kloc)
                    val (get_elem_exp, ccode) =
                    match border {
                    | BorderNone =>
                        val chk_exp = make_call(std_FX_STR_CHKIDX, [:: arr_exp, i_exp, lbl ], CTypVoid, kloc)
                        (make_call(std_FX_STR_ELEM, [:: arr_exp, i_exp ], CTypUniChar, kloc), CExp(chk_exp) :: ccode)
                    | BorderClip => (make_call(std_FX_STR_ELEM_CLIP, [:: arr_exp, i_exp ], CTypUniChar, kloc), ccode)
                    | BorderWrap => (make_call(std_FX_STR_ELEM_WRAP, [:: arr_exp, i_exp ], CTypUniChar, kloc), ccode)
                    | BorderZero => (make_call(std_FX_STR_ELEM_ZERO, [:: arr_exp, i_exp ], CTypUniChar, kloc), ccode)
                    }
                    (true, get_elem_exp, ccode)
                | [:: DomainRange (a, b, delta)] =>
                    if border != BorderNone {
                        throw compile_err(kloc, "cgen: border extrapolation with ranges is not supported yet")
                    }
                    val (mask, (a_exp, ccode)) =
                        match a {
                        | AtomLit(KLitNil _) => (1, (make_int_exp(0, kloc), ccode))
                        | _ => (0, atom2cexp(a, ccode, kloc))
                        }
                    val (mask, (b_exp, ccode)) =
                        match b {
                        | AtomLit(KLitNil _) => (2 + mask, (make_int_exp(0, kloc), ccode))
                        | _ => (mask, atom2cexp(b, ccode, kloc))
                        }
                    val (delta_exp, ccode) = atom2cexp(delta, ccode, kloc)
                    val (substr_exp, ccode) = get_dstexp(dstexp_r, "substr", ctyp, ccode, kloc)
                    val call_substr = make_call(std_fx_substr,
                        [:: cexp_get_addr(arr_exp), a_exp, b_exp, delta_exp, make_int_exp(mask, kloc), cexp_get_addr(substr_exp) ],
                        CTypString, kloc)
                    (false, substr_exp, add_fx_call(call_substr, ccode, kloc))
                | _ => throw compile_err(kloc, "cgen: unexpected index type when accessing string (should be a single scalar index or range)")
                }
            | CTypVector _ =>
                match idxs {
                | [:: DomainFast(i)] =>
                    val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                    val get_elem_exp = make_call(get_id("FX_RRB_ELEM"), [:: CExpTyp(ctyp, kloc), arr_exp, i_exp ], ctyp, kloc)
                    (true, get_elem_exp, ccode)
                | [:: DomainElem(i)] =>
                    val (i_exp, ccode) = atom2cexp_(i, true, ccode, kloc)
                    val (get_elem_exp, ccode) =
                    match border {
                    | BorderNone =>
                        val chk_exp = make_call(get_id("FX_RRB_CHKIDX"), [:: arr_exp, i_exp, lbl ], CTypVoid, kloc)
                        val get_elem_exp = make_call(get_id("FX_RRB_ELEM"), [:: CExpTyp(ctyp, kloc), arr_exp, i_exp ], ctyp, kloc)
                        (get_elem_exp, CExp(chk_exp) :: ccode)
                    | BorderClip =>
                        val get_elem_exp = make_call(get_id("FX_RRB_ELEM_CLIP"), [:: CExpTyp(ctyp, kloc), arr_exp, i_exp ], ctyp, kloc)
                        (get_elem_exp, ccode)
                    | BorderWrap =>
                        val get_elem_exp = make_call(get_id("FX_RRB_ELEM_WRAP"), [:: CExpTyp(ctyp, kloc), arr_exp, i_exp ], ctyp, kloc)
                        (get_elem_exp, ccode)
                    | BorderZero =>
                        val get_elem_exp = make_call(get_id("FX_RRB_ELEM_ZERO"), [:: CExpTyp(ctyp, kloc), arr_exp, i_exp ], ctyp, kloc)
                        (get_elem_exp, ccode)
                    }
                    (true, get_elem_exp, ccode)
                | [:: DomainRange (a, b, delta)] =>
                    if border != BorderNone {
                        throw compile_err(kloc, "cgen: border extrapolation with ranges is not supported yet")
                    }
                    val (mask, (a_exp, ccode)) =
                        match a {
                        | AtomLit(KLitNil _) => (1, (make_int_exp(0, kloc), ccode))
                        | _ => (0, atom2cexp(a, ccode, kloc))
                        }
                    val (mask, (b_exp, ccode)) =
                        match b {
                        | AtomLit(KLitNil _) => (2 + mask, (make_int_exp(0, kloc), ccode))
                        | _ => (mask, atom2cexp(b, ccode, kloc))
                        }
                    val delta = match delta {
                    | AtomLit(KLitNil _) | AtomLit(KLitInt 1i64) => 1
                    | AtomLit(KLitInt(-1i64)) => -1
                    | _ =>
                        throw compile_err(kloc, "cgen: vector slicing only supports stride == ±1")
                    }
                    val (slice_exp, ccode) = get_dstexp(dstexp_r, "slice", ctyp, ccode, kloc)
                    val call_slice = make_call(get_id("fx_rrb_slice"),
                        [:: cexp_get_addr(arr_exp), a_exp, b_exp, make_int_exp(delta, kloc),
                         make_int_exp(mask, kloc), cexp_get_addr(slice_exp)],
                        ctyp, kloc)
                    (false, slice_exp, add_fx_call(call_slice, ccode, kloc))
                | _ => throw compile_err(kloc, "cgen: unexpected index type when accessing vector (should be a single scalar index or range)")
                }
            | CTypArray _ =>
                val need_subarr = exists(for d <- idxs { | DomainRange _ => true | _ => false })
                val need_flatten = need_subarr &&
                    (match idxs {
                    | [:: DomainRange(AtomLit(KLitNil _), AtomLit(KLitNil _), AtomLit(KLitNil _))]
                    | [:: DomainRange(AtomLit(KLitNil _), AtomLit(KLitNil _), AtomLit(KLitInt 1i64))] => true
                    | _ => false
                    })
                if need_flatten {
                    val (subarr_exp, _) = get_dstexp(dstexp_r, "arr", ctyp, ccode, kloc)
                    val call_flatten = make_call(get_id("fx_flatten_arr"),
                        [:: cexp_get_addr(arr_exp), cexp_get_addr(subarr_exp) ], CTypCInt, kloc)
                    (false, subarr_exp, add_fx_call(call_flatten, ccode, kloc))
                } else if need_subarr {
                    if border != BorderNone {
                        throw compile_err(kloc, "cgen: border extrapolation with ranges is not supported yet")
                    }
                    val fold range_data = [], ccode = ccode for d <- idxs {
                        match d {
                        | DomainElem i =>
                            val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                            (i_exp :: make_int_exp(0, kloc) :: range_data, ccode)
                        | DomainFast i =>
                            val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                            (i_exp :: make_int_exp(0, kloc) :: range_data, ccode)
                        | DomainRange (a, b, delta) =>
                            val (a_exp, ccode) =
                                match a {
                                | AtomLit(KLitNil _) => (make_int_exp(0, kloc), ccode)
                                | _ => atom2cexp(a, ccode, kloc)
                                }
                            val (range_delta, ccode) =
                                match b {
                                | AtomLit(KLitNil _) => ([:: a_exp, make_int_exp(2, kloc)], ccode)
                                | _ => val (b_exp, ccode) = atom2cexp(b, ccode, kloc)
                                    ([:: b_exp, a_exp, make_int_exp(1, kloc)], ccode)
                                }
                            val (d_exp, ccode) = atom2cexp(delta, ccode, kloc)
                            ((d_exp :: range_delta) + range_data, ccode)
                        }
                    }
                    val (subarr_exp, ccode) = get_dstexp(dstexp_r, "arr", ctyp, ccode, kloc)
                    val rdata_ctyp = CTypRawArray([::CTypConst], CTypInt)
                    val rdata_arr = CExpInit(range_data.rev(), (rdata_ctyp, kloc))
                    val (rdata_exp, sub_ccode) =
                        create_cdefval(gen_idc(cm_idx, "ranges"), rdata_ctyp,
                                    default_tempval_flags(), "", Some(rdata_arr), [], kloc)
                    val call_subarr = make_call(std_fx_subarr, [:: cexp_get_addr(arr_exp), rdata_exp, cexp_get_addr(subarr_exp) ], CTypInt, kloc)
                    val sub_ccode = add_fx_call(call_subarr, sub_ccode, kloc)
                    val ccode = rccode2stmt(sub_ccode, kloc) :: ccode
                    (false, subarr_exp, ccode)
                } else {
                    val elem_ctyp = ctyp
                    val fold chk_exp_opt = (None: cexp_t?), i_exps = [],
                            ccode = ccode for d@dim <- idxs {
                        val d = if border == BorderNone { d }
                                else { match d { | DomainElem i => DomainFast(i) | _ => d } }
                        match d {
                        | DomainFast i =>
                            val (i_exp, ccode) = atom2cexp(i, ccode, kloc)
                            (chk_exp_opt, i_exp :: i_exps, ccode)
                        | DomainElem i =>
                            val (i_exp, ccode) = atom2cexp_(i, true, ccode, kloc)
                            val chk_exp1 = make_call(std_FX_CHKIDX1, [:: arr_exp, make_int_exp(dim, kloc), i_exp ], CTypBool, kloc)
                            val chk_exp_opt =
                            match chk_exp_opt {
                            | Some chk_exp =>
                                val chk_exp = CExpBinary(COpLogicAnd, chk_exp, chk_exp1, (CTypBool, kloc))
                                Some(chk_exp)
                            | _ => Some(chk_exp1)
                            }
                            (chk_exp_opt, i_exp :: i_exps, ccode)
                        | _ => throw compile_err(kloc, "cgen: unexpected index type")
                        }
                    }
                    val ccode =
                        match chk_exp_opt {
                        | Some chk_exp =>
                            val call_chkidx = make_call(std_FX_CHKIDX, [:: chk_exp, lbl ], CTypVoid, kloc)
                            CExp(call_chkidx) :: ccode
                        | _ => ccode
                        }
                    val ndims = idxs.length()
                    val access_op =
                        match border {
                        | BorderNone => std_FX_PTR_xD
                        | BorderClip => std_FX_PTR_xD_CLIP
                        | BorderWrap => std_FX_PTR_xD_WRAP
                        | BorderZero => std_FX_PTR_xD_ZERO
                        }
                    val get_elem_exp = make_call(access_op.nth(ndims - 1), CExpTyp(elem_ctyp, kloc) ::
                                                 arr_exp :: i_exps.rev(), make_ptr(elem_ctyp), kloc)
                    (true, cexp_deref(get_elem_exp), ccode)
                }
            | _ =>
                throw compile_err(kloc,
                    "cgen: unknown/unsupported type of the container, it should be CTypArray _ or CTypVector _ or CTypString")
            }
        | KExpMem (a1, n, _) =>
            val (ce1, ccode) = id2cexp(a1, false, ccode, kloc)
            val (_, ce1, relems, ofs) = get_struct(ce1)
            val nelems = relems.length()
            if n < 0 || n + ofs >= nelems {
                throw compile_err(kloc, f"cgen: the tuple/record element index {n} is out of range [0, {nelems}]")
            }
            val (n_id, _) = relems.nth(n + ofs)
            (true, cexp_mem(ce1, n_id, ctyp), ccode)
        | KExpAssign (i, a, _) =>
            val ktyp = get_idk_ktyp(i, kloc)
            val {ktp_complex} = K_annotate.get_ktprops(ktyp, kloc)
            val ccode =
            if ktp_complex {
                val (i_exp, ccode) = id2cexp(i, true, ccode, kloc)
                val (e_exp, ccode) = atom2cexp_(a, true, ccode, kloc)
                val ctyp = get_cexp_typ(i_exp)
                if is_subarray(i, kloc) {
                    val copy_arr_data =
                    make_call(std_fx_copy_arr_data, [:: cexp_get_addr(e_exp), cexp_get_addr(i_exp), make_bool_exp(true, kloc) ], CTypInt, kloc)
                    add_fx_call(copy_arr_data, ccode, kloc)
                } else {
                    val ccode = C_gen_types.gen_free_code(i_exp, ctyp, true, false, ccode, kloc)
                    C_gen_types.gen_copy_code(e_exp, i_exp, ctyp, ccode, kloc)
                }
            } else {
                val (i_exp, ccode) = id2cexp(i, false, ccode, kloc)
                val (a_exp, ccode) = atom2cexp(a, ccode, kloc)
                C_gen_types.gen_copy_code(a_exp, i_exp, ctyp, ccode, kloc)
            }
            (false, dummy_exp, ccode)
        | KExpMatch (cases, _) =>
            /*
            code00; if(exp00) {
                code01; if(exp01) {
                ... if(exp0N0) {
                    action0; goto _fx_endmatch...;
            }...}}
            code10; if(exp01) {
                code11; if(exp11) {
                ... if(exp1N1) {
                    action1; goto _fx_endmatch...;
            }...}}
            ...
            #if <have_default_action>
            default_action; // if there is default case
            #else
            FX_THROW_FAST(FX_EXN_NoMatchError, parent_label);
            #endif
            [:: _fx_endmatch...:]
            ...
            */
            val (dst_exp, ccode) = get_dstexp(dstexp_r, "res", ctyp, ccode, kloc)
            val ccode = process_cases(cases, dstexp_r, ccode, false, kloc)
            (false, dst_exp, ccode)
        | KExpTryCatch (try_e, catch_e, _) =>
            /*
            [:: dst_exp =] try_block
            if(fx_status < 0) {
                fx_status = 0;
                [:: destruct dst_exp] // because it maybe partially constructed at this point,
                                   // but we are going to re-use it
                catch_code
            }
            */
            val (pop_exn, catch_e) = match catch_e {
                | KExpSeq((KDefVal(_, KExpIntrin(IntrinPopExn, _, _), _) as pop_exn) :: catch_e_seq, _) =>
                    (pop_exn, code2kexp(catch_e_seq, kloc))
                | KExpSeq((KExpIntrin(IntrinPopExn, _, _) as pop_exn) :: catch_e_seq, _) =>
                    (pop_exn, code2kexp(catch_e_seq, kloc))
                | _ =>
                    throw compile_err(kloc, "catch part in KExpTryCatch() should be a sequence starting with 'val x = pop_exn()'")
                }
            val (dst_exp, ccode) = get_dstexp(dstexp_r, "res", ctyp, ccode, kloc)
            val try_loc = get_kexp_loc(try_e)
            val try_end_loc = get_end_loc(try_loc)
            new_block_ctx(BlockKind_Try, try_loc)
            val (_, try_ccode) = kexp2cexp(try_e, dstexp_r, [])
            val bctx_try = curr_block_ctx(kloc)
            val {bctx_prologue, bctx_cleanup, bctx_label, bctx_label_used} = *bctx_try
            val epilogue = if bctx_label_used == 0 { bctx_cleanup }
                           else { bctx_cleanup + [:: CStmtLabel(bctx_label, try_end_loc)] }
            val ccode = epilogue + (try_ccode + (bctx_prologue + ccode))
            pop_block_ctx(try_end_loc)
            val fx_status_exp = make_fx_status(try_end_loc)
            val (_, catch_ccode) = kexp2cexp(pop_exn, ref None, [])
            val catch_ccode = CExp(CExpBinary(COpAssign, fx_status_exp, make_int_exp(0, try_end_loc), (CTypVoid, try_end_loc))) :: catch_ccode
            val catch_ccode =
                match ctyp {
                | CTypVoid => catch_ccode
                | _ => C_gen_types.gen_free_code(dst_exp, ctyp, true, true, catch_ccode, try_end_loc)
                }
            val (_, catch_ccode) = kexp2cexp(catch_e, dstexp_r, catch_ccode)
            val check_neg_status = CExpBinary(COpCmp(CmpLT), make_fx_status(try_end_loc), make_int_exp(0, try_end_loc), (CTypBool, try_end_loc))
            val catch_loc = get_kexp_loc(catch_e)
            val catch_clause = make_if(check_neg_status, rccode2stmt(catch_ccode, catch_loc), CStmtNop(try_end_loc), catch_loc)
            (false, dst_exp, catch_clause :: ccode)
        | KExpThrow (i, rethrow, _) =>
            val lbl = curr_block_label(kloc)
            val ccode =
            if rethrow {
                val (i_exp, ccode) = id2cexp(i, false, ccode, kloc)
                val throw_exp = make_call(std_FX_RETHROW, [:: cexp_get_addr(i_exp), lbl ], CTypVoid, kloc)
                CExp(throw_exp) :: ccode
            } else {
                match builtin_exceptions.find_opt(i) {
                | Some _ =>
                    val i = get_id("FX_EXN_" + pp(i))
                    val i_exp = make_id_t_exp(i, CTypCInt, kloc)
                    val throw_exp = make_call(std_FX_FAST_THROW, [:: i_exp, lbl ], CTypVoid, kloc)
                    CExp(throw_exp) :: ccode
                | _ =>
                    val move_f =
                        match kinfo_(i, kloc) {
                        | KExn _ => false
                        | KVal ({kv_typ=KTypExn}) => u1vals.mem(i)
                        | _ => throw compile_err(kloc, "cgen: throw is applied to neither exception nor value of 'exn' type")
                        }
                    val (i_exp, ccode) = id2cexp(i, move_f, ccode, kloc)
                    val throw_exp = make_call(std_FX_THROW, [:: cexp_get_addr(i_exp), make_bool_exp(move_f, kloc), lbl ], CTypVoid, kloc)
                    CExp(throw_exp) :: ccode
                }
            }
            (false, dummy_exp, ccode)
        | KExpCast (a1, kt, _) =>
            val (ce1, ccode) = atom2cexp(a1, ccode, kloc)
            val atyp = get_atom_ktyp(a1, kloc)
            val ctyp = C_gen_types.ktyp2ctyp(kt, kloc)
            (true, (match (atyp, ctyp) {
            | (KTypFloat(16), CTypFloat(32)) =>
                make_call(get_id("FX_FLOAT"), [:: ce1], CTypFloat(32), kloc)
            | (KTypFloat(16), _) =>
                CExpCast(make_call(get_id("FX_FLOAT"), [:: ce1], CTypFloat(32), kloc), ctyp, kloc)
            | (_, CTypFloat(16)) =>
                make_call(get_id("FX_FLOAT16"), [:: ce1], CTypFloat(16), kloc)
            | _ => CExpCast(ce1, ctyp, kloc)
            }), ccode)
        | KExpMap (e_idoml_l, body, flags, _) =>
            /*
                1. generate output collection (`arr/list_first = get_dstexp).
                   in the case of list also declare `_fx_L... list_last=0;`
                2. make list of expressions before each for in the comprehension, starting from the outer loop:
                    [:: pre_e1, pre_e2, ..., pre_body]
                   at once compute the output dimensionality
                3. define recursive function:
                    run process_for
                    if it's the inner-most loop and if we make array,
                        check the size of array if it is already created,
                        otherwise create the array.
                    if we make array, but there are just lists
                    form nested scope
                    form nested_ccode (see KExpFor, how body_ccode is formed)
                    if there are no more nested fors,
                        call `result=get_dstexp...` to store the body expression result.
                        gen ccode for the body.
                        if we make array,
                            check that there are no break or continue statement inside for.
                            form `*dstptr++ = result;` expression
                        otherwise (we make list)
                            create list cell (no need to put it to prologue, because we are not going to release it)
                            _fx_L... list_cell;
                            FX_CALL(_fx_make_L...(result, 0, &list_cell), end_loop);
                            FX_LIST_APPEND(list_first, list_last, list_cell);
                        finalize loop, make [:: CExp (CExpFor(...))]
                    else
                        call function recursively, get for-loop with attached parts.
                    return post_ccode :: (formed_for_loop @ init_ccode)
                4. pass there everything, append the result to ccode.
                5. return (false, arr/list_exp, ccode)
            */
            val map_lbl = curr_block_label(kloc)
            val for_loc = get_start_loc(kloc)
            val end_for_loc = get_end_loc(kloc)
            val for_flag_make = flags.for_flag_make
            val need_make_array = for_flag_make == ForMakeArray
            val unzip_mode = flags.for_flag_unzip
            val nfors = e_idoml_l.length()
            /* collect all the variables/values declared inside for (include iterations variables) */
            val pre_alloc_array =
            if !need_make_array {
                false
            } else {
                val decl_inside_for = empty_id_hashset(256)
                all(for (e, idoml, idxl) <- e_idoml_l {
                        decl_inside_for.union(declared(e::[], 256))
                        for i <- idxl {
                            decl_inside_for.add(i)
                        }
                        all(for (i, dom) <- idoml {
                            decl_inside_for.add(i)
                            match dom {
                            | DomainElem(AtomId col) =>
                                val {kv_typ, kv_flags} = get_kval(col, kloc)
                                !kv_flags.val_flag_mutable &&
                                (match kv_typ { | KTypArray _ | KTypString => true | _ => false }) &&
                                !decl_inside_for.mem(col)
                            | DomainElem(AtomLit(KLitString _)) => true
                            | DomainRange (a, b, delta) =>
                                fun check_range_elem(abd: atom_t) =
                                    match abd {
                                    | AtomId k =>
                                        !is_mutable(k, kloc) && !decl_inside_for.mem(k)
                                    | _ => true
                                    }
                                check_range_elem(a) && check_range_elem(b) && check_range_elem(delta)
                            | _ => false
                            }
                        })
                    })
            }
            /* compute the total array dimensionality */
            val fold ndims = 0 for (e, idoml, _)@for_idx <- e_idoml_l {
                val ndims_i = compute_for_ndims(for_idx, nfors, idoml, for_loc)
                ndims + ndims_i
            }
            val is_parallel_map = pre_alloc_array && flags.for_flag_parallel
            val glob_status = make_id_t_exp(fx_status_, CTypCInt, kloc)
            val (par_status, ccode, nested_status, decl_nested_status) =
            if is_parallel_map {
                val (par_status, ccode) =
                create_cdefval(gen_idc(cm_idx, "par_status"), CTypCInt, default_tempvar_flags(), "", Some(make_int_exp(0, kloc)), ccode, kloc)
                val (nested_status, decl_nested_status) =
                create_cdefval(gen_idc(cm_idx, "status"), CTypCInt, default_tempvar_flags(), "fx_status", Some(make_int_exp(0, kloc)), [], kloc)
                (par_status, ccode, nested_status, decl_nested_status)
            } else {
                (make_dummy_exp(kloc), ccode, glob_status, [])
            }
            /* declare the output array/list; in the case of array also declare the output pointer;
               in the case of list also declare pointer to the last list element */
            val coll_typs =
                match (unzip_mode, deref_ktyp(ktyp, for_loc)) {
                | (false, t) => [:: ktyp]
                | (true, KTypTuple tl) => tl
                | (_, _) => throw compile_err(kloc, "cgen: the result of @unzip comprehension should be a tuple")
                }
            val fold dst_data = [], ccode = ccode, finalize_ccode = [] for coll_typ <- coll_typs {
                val coll_ctyp = C_gen_types.ktyp2ctyp(coll_typ, kloc)
                match (for_flag_make, coll_ctyp, deref_ktyp(coll_typ, kloc)) {
                | (ForMakeArray, CTypArray (nd, elemtyp), KTypArray _) =>
                    val (dst_exp, ccode) =
                        if unzip_mode {
                            add_local(gen_idc(cm_idx, "arr"), coll_ctyp, default_tempval_flags(), None, ccode, for_loc)
                        } else {
                            get_dstexp(dstexp_r, "arr", coll_ctyp, ccode, for_loc)
                        }
                    val (dst_ptr, ccode) =
                    if is_parallel_map {
                        (make_dummy_exp(kloc), ccode)
                    } else {
                        create_cdefval( gen_idc(cm_idx, "dstptr"), make_ptr(elemtyp),
                                        default_tempvar_flags(), "",
                                        Some(make_nullptr(for_loc)),
                                        ccode, for_loc)
                    }
                    if nd != ndims {
                        throw compile_err(kloc, f"cgen: invalid dimensionaly of array comprehension result (computed: {ndims}, expected: {nd})")
                    } else {
                        ((coll_ctyp, elemtyp, dst_exp, dst_ptr, make_dummy_exp(for_loc)) :: dst_data, ccode, finalize_ccode)
                    }
                | (ForMakeVector, CTypVector (elemtyp), KTypVector _) =>
                    val (dst_exp, ccode) =
                        if unzip_mode {
                            add_local(gen_idc(cm_idx, "vec"), coll_ctyp, default_tempval_flags(), None, ccode, for_loc)
                        } else {
                            get_dstexp(dstexp_r, "vec", coll_ctyp, ccode, for_loc)
                        }
                    val (sizeof_elem_exp, free_f_exp, copy_f_exp) = get_elem_size_free_copy(elemtyp, for_loc)
                    val iter_t = CTypName(get_id("fx_rrbiter_t"))
                    val (iter_exp, ccode) = create_cdefval( gen_idc(cm_idx, "iter"), iter_t,
                                                default_tempvar_flags(), "", None, ccode, for_loc)
                    val call_start_write = make_call(get_id("FX_RRB_START_WRITE"),
                                                [:: CExpTyp(elemtyp, for_loc), sizeof_elem_exp, free_f_exp, copy_f_exp,
                                                dst_exp, iter_exp],
                                                std_CTypVoidPtr, for_loc)
                    val (dst_ptr, ccode) = create_cdefval( gen_idc(cm_idx, "dstptr"), make_ptr(elemtyp),
                                                default_tempvar_flags(), "", Some(call_start_write), ccode, for_loc)
                    val call_end_write = make_call(get_id("FX_RRB_END_WRITE"), [:: iter_exp, dst_ptr ],
                                                CTypVoid, for_loc)
                    ((coll_ctyp, elemtyp, dst_exp, dst_ptr, iter_exp) :: dst_data, ccode, CExp(call_end_write) :: finalize_ccode)
                | (ForMakeList, _, KTypList kelemtyp) =>
                    val elemtyp = C_gen_types.ktyp2ctyp(kelemtyp, for_loc)
                    val (dst_exp, ccode) =
                        if unzip_mode {
                            add_local(gen_idc(cm_idx, "lst"), coll_ctyp, default_tempvar_flags(), None, ccode, for_loc)
                        } else {
                            get_dstexp(dstexp_r, "lst", coll_ctyp, ccode, for_loc)
                        }
                    val (lst_end, ccode) =
                    create_cdefval(gen_idc(cm_idx, "lstend"), coll_ctyp, default_tempvar_flags(), "", Some(make_nullptr(for_loc)), ccode, for_loc)
                    ((coll_ctyp, elemtyp, dst_exp, make_dummy_exp(for_loc), lst_end) :: dst_data, ccode, finalize_ccode)
                | _ =>
                    val maptype_str = match for_flag_make {
                        | ForMakeArray => "make_array"
                        | ForMakeList => "make_list"
                        | ForMakeVector => "make_vector"
                        | _ => "???"
                        }
                    throw compile_err( kloc,
                        f"cgen: invalid combination of comprehension type '{maptype_str}' and the output collection type '{coll_typ}'")
                }
            }
            val dst_data = dst_data.rev()
            /* form the nested for statement */
            fun form_map(pre_map_ccode: ccode_t, for_idx: int,
                         e_idoml_l: (kexp_t, (id_t, dom_t) list, id_t list) list,
                         prev_i_exps: cexp_t list, prev_n_exps: cexp_t list)
            {
                val (init_kexp, idoml, at_ids, nested_e_idoml) =
                    match e_idoml_l {
                    | (e, idoml, at_ids) :: rest => (e, idoml, at_ids, rest)
                    | _ => throw compile_err(for_loc, "cgen: empty e_idoml_l in KExpMap")
                    }
                val dims_ofs = prev_n_exps.length()
                val nested_loc = get_kexp_loc(init_kexp)
                val nested_loc = if nested_loc == noloc { for_loc } else { nested_loc }
                val (_, init_ccode) = kexp2cexp(init_kexp, ref None, [])
                val lbl = curr_block_label(nested_loc)
                val (for_headers, list_exps, i_exps, n_exps, pre_map_ccode_delta, pre_body_ccode, body_elems, post_ccode) =
                    process_for(lbl, idoml, at_ids, for_idx, nfors, ndims, dims_ofs, nested_e_idoml, [], nested_loc)
                val (ndims_i, n_exps, lst_len_ccode) =
                    match (list_exps, n_exps, need_make_array) {
                    | (_, n_exp :: _, _) => (n_exps.length(), n_exps, [])
                    | (l_exp :: _, [], true) =>
                        /* if there is no fixed range or array iterated, just list(s),
                            and we need to make array, we need to compute length of the list. */
                        val call_list_len = make_call(std_fx_list_length, [:: l_exp], CTypInt, nested_loc)
                        val (lstlen, lst_len_ccode) =
                        create_cdefval(gen_idc(cm_idx, "len"), CTypInt, default_tempval_flags(), "", Some(call_list_len), [], nested_loc)
                        (1, [:: lstlen], lst_len_ccode)
                    | ([], [], true) =>
                        throw compile_err(nested_loc, for_err_msg(for_idx, nfors, -1, "array comprehension uses 'for' with indefinite range"))
                    | _ => (1, n_exps, [])
                    }
                val n_exps = prev_n_exps + n_exps
                val i_exps = prev_i_exps + i_exps
                val (_, _, dst_exp0, _, _) = dst_data.hd()
                val alloc_array_ccode =
                    if !need_make_array || dims_ofs + ndims_i < ndims { [] }
                    else {
                        val (_, cmp_size_list) =
                        fold k = 0, cmp_size_list = [] for n_exp <- n_exps {
                            val size_i = make_call(std_FX_ARR_SIZE, [:: dst_exp0, make_int_exp(k, nested_loc) ], CTypInt, nested_loc)
                            val cmp_size_i = CExpBinary(COpCmp(CmpEQ), size_i, n_exp, (CTypBool, nested_loc))
                            (k + 1, cmp_size_i :: cmp_size_list)
                        }
                        val lbl = if pre_alloc_array { map_lbl }
                                  else { curr_block_label(nested_loc) }
                        val fold then_ccode = [] for (coll_ctyp, elemtyp, dst_exp, dst_ptr, _) <- dst_data {
                            val then_ccode = make_make_arr_call(dst_exp, n_exps, [], then_ccode, lbl, nested_loc)
                            if is_parallel_map {
                                then_ccode
                            } else {
                                val arr_data = CExpCast(cexp_mem(dst_exp, get_id("data"), std_CTypVoidPtr), make_ptr(elemtyp), nested_loc)
                                val set_dstptr = make_assign(dst_ptr, arr_data)
                                CExp(set_dstptr) :: then_ccode
                            }
                        }
                        if for_idx == 0 || pre_alloc_array {
                            then_ccode
                        } else {
                            val cc_exp = cexp_mem(dst_exp0, get_id("data"), std_CTypVoidPtr)
                            val cc_exp = CExpUnary(COpLogicNot, cc_exp, (CTypBool, nested_loc))
                            val else_ccode = add_size_eq_check(cmp_size_list, [], lbl, nested_loc)
                            val check_or_create = make_if(cc_exp, rccode2stmt(then_ccode, nested_loc), rccode2stmt(else_ccode, nested_loc), nested_loc)
                            [:: check_or_create]
                        }
                    }
                val (pre_map_ccode, init_ccode) =
                    if pre_alloc_array {
                        (alloc_array_ccode + pre_map_ccode_delta + pre_map_ccode, lst_len_ccode + init_ccode)
                    } else {
                        (pre_map_ccode, alloc_array_ccode + lst_len_ccode + pre_map_ccode_delta + init_ccode)
                    }
                val for_flags = default_for_flags().{for_flag_nested=for_idx > 0, for_flag_parallel=is_parallel_map && for_idx <= 0}
                new_for_block_ctx(ndims, for_flags, nested_status, par_status, kloc)
                /* inside the loop body context form `<etyp> v=e` expressions (or more complex ones in the case of complex types) */
                val body_ccode = decl_for_body_elems(body_elems, [])
                val (add_incr_dstptr, dst_data, pre_body_ccode, (pre_map_ccode, body_ccode)) =
                match nested_e_idoml {
                | [:: (body, [], [])] =>
                    /* add the loop body itself */
                    val body_loc = get_kexp_loc(body)
                    val (add_incr_dstptr, dst_data, pre_body_ccode, body_ccode) =
                    if !is_parallel_map {
                        (true, dst_data, pre_body_ccode, body_ccode)
                    } else {
                        val n_i_exps = i_exps.length()
                        if !(n_i_exps == n_exps.length() && n_i_exps == ndims) {
                            throw compile_err( body_loc,
                                f"cgen: internal error when compiling parallel for: incorrect number of iteration indices (={n_i_exps})."+
                                f"There should be as many as the output array dimensionality (={ndims})")
                        }
                        val fold dst_data = [], decl_dstptr_ccode_all = [] for (coll_ctyp, elemtyp, dst_exp, dst_ptr, iter) <- dst_data {
                            val elemtyp_ptr = make_ptr(elemtyp)
                            val dst_idxs = if ndims == 1 { i_exps }
                                           else { (make_int_exp(0, body_loc) :: i_exps.rev().tl()).rev() }
                            val get_arr_slice = make_call(std_FX_PTR_xD.nth(ndims - 1), CExpTyp(elemtyp, body_loc) ::
                                                          dst_exp :: dst_idxs, elemtyp_ptr, body_loc)
                            val (dst_ptr, decl_dstptr_ccode) = create_cdefval(gen_idc(cm_idx, "dstptr"), elemtyp_ptr,
                                                                              default_tempvar_flags(), "",
                                                                              Some(get_arr_slice), [], body_loc)
                            ((coll_ctyp, elemtyp, dst_exp, dst_ptr, iter) :: dst_data,
                            decl_dstptr_ccode + decl_dstptr_ccode_all)
                        }
                        if ndims == 1 {
                            (false, dst_data.rev(), pre_body_ccode, decl_dstptr_ccode_all + body_ccode)
                        } else {
                            (true, dst_data.rev(), decl_dstptr_ccode_all + pre_body_ccode, body_ccode)
                        }
                    }
                    val (result, body_ccode) = kexp2cexp(body, ref None, body_ccode)
                    /* [TODO] if the result is temporarily created value, then it would be more efficient
                               to "move" it to the output collection instead of copying it there:
                        1. the result should be a local variable that is defined in the body prologue
                            (which means that it's temporary and it's complex).
                            Note that it is compile-time check, not runtime check.
                            If the result is not complex, we don't call destructor and use simple '=' operator,
                            no need to bother with move.
                        2. we update C_gen_types.get_ctprops to return move macro
                            (FX_MOVE_STR, FX_MOVE_ARR, FX_MOVE_PTR, FX_MOVE_RECORD
                            (the last one is universal macro)).
                            we use move_macro(result, *dstptr).
                            We still need to retain conditional destructor call for 'result',
                            but it will do nothing (and the check will be perfectly predicted,
                            because it will _always_ do nothing, except for the exceptional situations)
                        3. (optional: we need to extend _fx_cons_L...() implementation:
                            add `bool move_hd` parameter), otherwise we could use this 'move' trick
                            only with array and vector comprehensions.
                    */
                    val fold body_ccode = body_ccode for (coll_ctyp, elemtyp, dst_exp, dst_ptr, iter)@j <- dst_data {
                        val result_j =
                            if !unzip_mode {
                                result
                            } else {
                                val (_, result, relems, ofs) = get_struct(result)
                                val nelems = relems.length()
                                if j < 0 || j + ofs >= nelems {
                                    throw compile_err(body_loc, f"cgen: the tuple/record element index {j} is out of range [:: 0, {nelems}]")
                                }
                                val (j_id, _) = relems.nth(j + ofs)
                                cexp_mem(result, j_id, elemtyp)
                            }
                        match for_flag_make {
                        | ForMakeArray =>
                            C_gen_types.gen_copy_code(result_j, cexp_deref(dst_ptr), elemtyp, body_ccode, body_loc)
                        | ForMakeVector =>
                            val write_f = match C_gen_types.get_copy_f(elemtyp, true, false, body_loc) {
                                          | (_, Some _) => get_id("FX_RRB_WRITE")
                                          | _ => get_id("FX_RRB_WRITE_FAST")
                                          }
                            val lbl = curr_block_label(body_loc)
                            val write_call = make_call(write_f, [:: CExpTyp(elemtyp, body_loc),
                                                iter, dst_ptr, result_j, lbl], CTypVoid, body_loc)
                            (CExp(write_call) :: body_ccode)
                        | ForMakeList =>
                            val (node_exp, body_ccode) =
                                create_cdefval(gen_idc(cm_idx, "node"), coll_ctyp, default_tempval_flags(), "",
                                               Some(make_nullptr(body_loc)), body_ccode, body_loc)
                            val body_ccode = make_cons_call(result_j, make_nullptr(body_loc), false, node_exp, body_ccode, body_loc)
                            val append_call = make_call(std_FX_LIST_APPEND, [:: dst_exp, iter, node_exp ], CTypVoid, body_loc)
                            (CExp(append_call) :: body_ccode)
                        | _ => throw compile_err(body_loc, "unsupported kind of comprehension (only arrays, vectors and lists are supported)")
                        }
                    }
                    (add_incr_dstptr && need_make_array, dst_data, pre_body_ccode, (pre_map_ccode, body_ccode))
                | _ => (false, dst_data, pre_body_ccode, form_map(pre_map_ccode, for_idx + 1, nested_e_idoml, i_exps, n_exps))
                }
                /* add the initialization and the cleanup sections, if needed */
                val (br_label, body_stmt) = finalize_loop_body(body_ccode, !need_make_array, kloc)

                /* form (possibly nested) for statement */
                val nfor_headers = for_headers.length()
                val fold for_ccode = stmt2ccode(body_stmt).rev()
                    for (t_opt, for_inits, for_check_opt, for_incrs)@k <- for_headers.rev() {
                    val for_incrs =
                        if k > 0 || !add_incr_dstptr { for_incrs }
                        else {
                            for_incrs + [:: for (_, _, _, dst_ptr, _) <- dst_data {
                                         CExpUnary(COpSuffixInc, dst_ptr, (CTypVoid, for_loc)) }]
                        }
                    val insert_pragma = for_idx == 0 && is_parallel_map && k + 1 == nfor_headers
                    val for_body_ccode = if !insert_pragma { for_ccode }
                                         else { for_ccode + decl_nested_status }
                    val (t_opt, init_t_opt) =
                        match (t_opt, insert_pragma, post_ccode) {
                        | (Some _, false, _ :: _) => (None, t_opt)
                        | _ => (t_opt, None)
                        }
                    val for_ccode =
                        match init_t_opt {
                        | Some t =>
                            fold for_ccode = [] for e <- for_inits {
                                match e {
                                | CExpBinary (COpAssign, CExpIdent (i, (_, loc_i)), _, _) => CDefVal(t, i, None, loc_i) :: for_ccode
                                | e => throw compile_err(get_cexp_loc(e), "invalid expression in the for-loop initialization part (should be i=<exp0>)")
                                }
                            }
                        | _ => []
                        }
                    val for_ccode = CStmtFor(t_opt, for_inits, for_check_opt, for_incrs,
                                            rccode2stmt(for_body_ccode, kloc), kloc) :: for_ccode
                    val for_ccode = if !insert_pragma { for_ccode }
                                    else { for_ccode + [:: CMacroPragma("omp parallel for", kloc) ] }
                    val for_ccode = if k > 0 || pre_body_ccode == [] { for_ccode }
                                    else { for_ccode + pre_body_ccode }
                    for_ccode
                }
                /* add the non-local "break" label if needed */
                val post_ccode = if br_label == noid { post_ccode }
                                 else { CStmtLabel(br_label, end_for_loc) :: post_ccode }
                (pre_map_ccode, post_ccode + (for_ccode + init_ccode))
            }

            val (pre_map_ccode, map_ccode) = form_map([], 0, e_idoml_l + [:: (body, [], [])], [], [])
            val map_ccode =
                if !is_parallel_map { map_ccode }
                else {
                    val update_exn_parallel = make_call(get_id("FX_UPDATE_EXN_PARALLEL"),
                                                        [:: par_status, map_lbl], CTypVoid, kloc)
                    CExp(update_exn_parallel) :: map_ccode
                }
            val map_ccode = finalize_ccode + map_ccode
            val map_ccode =
                if !unzip_mode {
                    map_ccode
                } else {
                    val tcon = C_gen_types.get_constructor(ctyp, false, kloc)
                    val cargs = [:: for (_, _, dst_exp, _, _) <- dst_data {
                                    make_fun_arg(dst_exp, kloc)
                                }]
                    val (t_exp, map_ccode) = get_dstexp(dstexp_r, "tup", ctyp, map_ccode, kloc)
                    val call_mktup = make_call(tcon, cargs + [:: cexp_get_addr(t_exp)], CTypVoid, kloc)
                    CExp(call_mktup) :: map_ccode
                }
            (false, dummy_exp, map_ccode + pre_map_ccode + ccode)
        | KExpFor (idoml, at_ids, body, flags, _) =>
            val lbl = curr_block_label(kloc)
            val for_loc = get_start_loc(kloc)
            val end_for_loc = get_end_loc(kloc)
            val ndims = compute_for_ndims(0, 1, idoml, for_loc)
            val glob_status = make_id_t_exp(fx_status_, CTypCInt, kloc)
            val is_parallel_for = flags.for_flag_parallel
            val (par_status, ccode, nested_status, decl_nested_status) =
            if is_parallel_for {
                val (par_status, ccode) = create_cdefval(gen_idc(cm_idx, "par_status"), CTypCInt,
                                                        default_tempvar_flags(), "",
                                                        Some(make_int_exp(0, kloc)), ccode, kloc)
                val (nested_status, decl_nested_status) =
                    create_cdefval(gen_idc(cm_idx, "status"), CTypCInt, default_tempvar_flags(),
                                    "fx_status", Some(make_int_exp(0, kloc)), [], kloc)
                (par_status, ccode, nested_status, decl_nested_status)
            } else {
                (make_dummy_exp(kloc), ccode, glob_status, [])
            }
            val (for_headers, _, _, _, ccode, pre_body_ccode, body_elems, post_ccode) =
            process_for(lbl, idoml, at_ids, 0, 1, ndims, 0, [:: (body, [], [])], ccode, kloc)
            new_for_block_ctx(ndims, flags, nested_status, par_status, kloc)
            val body_ccode = if is_parallel_for { decl_nested_status }
                             else { [] }
            /* inside the loop body context form `<etyp> v=e` expressions (or more complex ones in the case of complex types) */
            val body_ccode = decl_for_body_elems(body_elems, body_ccode)
            /* add the loop body itself */
            val (_, body_ccode) = kexp2cexp(body, ref None, body_ccode)
            val body_loc = get_kexp_loc(body)
            /* add the initialization and the cleanup sections, if needed */
            val (br_label, body_stmt) = finalize_loop_body(body_ccode, true, body_loc)

            /* form (possibly nested) for statement */
            val fold for_stmt = body_stmt for (t_opt, for_inits, for_check_opt, for_incrs)@k <- for_headers.rev() {
                val for_stmt = CStmtFor(t_opt, for_inits, for_check_opt, for_incrs, for_stmt, kloc)
                if k > 0 || pre_body_ccode == [] {
                    for_stmt
                } else {
                    rccode2stmt(for_stmt :: pre_body_ccode, for_loc)
                }
            }
            /* add the non-local "break" label if needed */
            val post_ccode = if br_label == noid { post_ccode }
                             else { CStmtLabel(br_label, end_for_loc) :: post_ccode }
            val (omp_pragma, post_ccode) =
                if !is_parallel_for {
                    ([], post_ccode)
                } else {
                    val update_exn_parallel = make_call(get_id("FX_UPDATE_EXN_PARALLEL"),
                                                        [:: par_status, lbl], CTypVoid, kloc)
                    ([::CMacroPragma("omp parallel for", kloc) ],
                    CExp(update_exn_parallel) :: post_ccode)
                }
            /* add it all to ccode; nothing to return/assign, since "for-loop" is "void" expression */
            (false, dummy_exp, post_ccode + ([::for_stmt ] + (omp_pragma + ccode)))
        | KExpWhile (c, body, _) =>
            new_block_ctx(BlockKind_Loop, kloc)
            val (cc, cc_code) = kexp2cexp(c, ref None, [])
            val (is_for_loop, check_code) =
            match (cc, cc_code) {
            | (CExpLit (KLitBool(true), _), []) => (true, [])
            | (_, []) => (false, [])
            | _ =>
                val cc_loc = get_cexp_loc(cc)
                val not_cc = CExpUnary(COpLogicNot, cc, (CTypBool, cc_loc))
                val break_stmt = make_break_stmt(cc_loc)
                val check_cc = make_if(not_cc, break_stmt, CStmtNop(cc_loc), cc_loc)
                (true, check_cc :: cc_code)
            }
            val (_, body_ccode) = kexp2cexp(body, ref None, [])
            val body_ccode = body_ccode + check_code
            val (_, body_stmt) = finalize_loop_body(body_ccode, true, kloc)
            val loop_stmt = if is_for_loop {
                                CStmtFor(None, [], None, [], body_stmt, kloc)
                            } else {
                                CStmtWhile(cc, body_stmt, kloc)
                            }
            (false, dummy_exp, loop_stmt :: ccode)
        | KExpDoWhile (body, c, _) =>
            new_block_ctx(BlockKind_Loop, kloc)
            val (_, body_ccode) = kexp2cexp(body, ref None, [])
            val (cc, cc_code) = kexp2cexp(c, ref None, [])
            val (is_for_loop, check_code) =
            match (cc, cc_code) {
            | (CExpLit (KLitBool(true), _), []) => (true, [])
            | (_, []) => (false, [])
            | _ =>
                val cc_loc = get_cexp_loc(cc)
                val not_cc = CExpUnary(COpLogicNot, cc, (CTypBool, cc_loc))
                val break_stmt = make_break_stmt(cc_loc)
                val check_cc = make_if(not_cc, break_stmt, CStmtNop(cc_loc), cc_loc)
                (true, check_cc :: cc_code)
            }
            val body_ccode = check_code + body_ccode
            val (_, body_stmt) = finalize_loop_body(body_ccode, true, kloc)
            val loop_stmt = if is_for_loop {
                                CStmtFor(None, [], None, [], body_stmt, kloc)
                            } else {
                                CStmtDoWhile(body_stmt, cc, kloc)
                            }
            (false, dummy_exp, loop_stmt :: ccode)
        | KExpCCode (ccode_str, _) =>
            if curr_block_ctx(kloc)->bctx_kind != BlockKind_Global {
                throw compile_err(kloc, "cgen: unexpected ccode expression")
            }
            top_inline_ccode = CExp(CExpCCode(ccode_str, kloc)) :: top_inline_ccode
            (false, dummy_exp, ccode)
        | KDefVal (i, e2, _) =>
            val {kv_typ, kv_cname, kv_flags} = get_kval(i, kloc)
            if is_val_global(kv_flags) || kv_flags.val_flag_ctor > 0 {
                defined_syms.add(i)
            }
            val {ktp_ptr, ktp_complex, ktp_scalar} = K_annotate.get_ktprops(kv_typ, kloc)
            val ctyp = C_gen_types.ktyp2ctyp(kv_typ, kloc)
            val bctx = curr_block_ctx(kloc)
            val (ccode_data_kind, ccode_data_lit, ccode_loc) =
                match e2 {
                | KExpCCode (c, (_, l)) => ("ccode", c, l)
                | _ => ("", "", kloc)
                }
            val ctor_id = kv_flags.val_flag_ctor
            val is_temp = kv_flags.val_flag_temp
            val is_temp_ref = kv_flags.val_flag_tempref
            val is_global = bctx->bctx_kind == BlockKind_Global && !is_temp && !is_temp_ref
            val is_fast_cons = is_temp &&
                (match e2 {
                | KExpBinary (OpCons, a, AtomId l, _) when u1vals.mem(l) =>
                    match kinfo_(l, kloc) {
                    | KVal ({kv_flags}) => kv_flags.val_flag_temp
                    | _ => false
                    }
                | _ => false
                })
            /* there are 3 major cases (ce2 denotes e2 converted to C):
                1. definition "ctyp i = ce2" is not added; instead, i is replaced with ce2.
                2. i is defined separately: "ctyp i[={}|0];" and then
                   expression is compiled with 'i' as the destination.
                3. i is defined and initialized at once: "ctyp i=ce2;"
            */
            val ccode =
                if ccode_data_kind == "ccode" {
                    val (_, delta_ccode) =
                        create_cdefval(i, ctyp, kv_flags, "",
                            Some(CExpCCode(ccode_data_lit, ccode_loc)), [], kloc)
                    bctx->bctx_prologue = delta_ccode + bctx->bctx_prologue
                    ccode
                } else if ctor_id > 0 {
                    val tag_exp = make_int_exp(ctor_id, kloc)
                    val is_null =
                        match kv_typ {
                        | KTypName tn =>
                            match kinfo_(tn, kloc) {
                            | KVariant (ref {kvar_flags}) =>
                                kvar_flags.var_flag_recursive && kvar_flags.var_flag_opt
                            | _ => false
                            }
                        | _ => false
                        }
                    val (init_exp, delta_ccode) =
                        if !ktp_ptr {
                            val init_exp = CExpInit([:: tag_exp], (ctyp, kloc))
                            (init_exp, [])
                        } else if is_null {
                            (make_nullptr(kloc), [])
                        } else {
                            /* temporarily put (i, ctyp) into the value table */
                            val (i_exp, _) = create_cdefval(i, ctyp, default_tempval_flags(), "", None, [], kloc)
                            val (rn, _, _, _) = get_struct(i_exp)
                            val struct_ctyp = CTypName(rn)
                            val data_id = gen_idc(cm_idx, pp(i) + "_data")
                            val rc_exp = make_int_exp(1, kloc)
                            val data_init = CExpInit([:: rc_exp, tag_exp], (struct_ctyp, kloc))
                            val (data_exp, delta_ccode) =
                            create_cdefval(data_id, struct_ctyp, default_val_flags().{val_flag_private=true},
                                            "", Some(data_init), [], kloc)
                            (cexp_get_addr(data_exp), delta_ccode)
                        }
                    val (_, delta_ccode) = create_cdefval(i, ctyp, default_val_flags().{val_flag_global=mod_sc},
                                                          "", Some(init_exp), delta_ccode, kloc)
                    /* just put initialization into the global scope, no destructors are needed */
                    bctx->bctx_prologue = delta_ccode + bctx->bctx_prologue
                    ccode
                } else if is_fast_cons || (is_temp_ref || ktp_scalar && is_temp) && u1vals.mem(i) {
                    val (ce2, ccode) = kexp2cexp(e2, ref None, ccode)
                    /* we still need to declare i to be able to access its type */
                    val _ = create_cdefval(i, ctyp, kv_flags, "", None, [], kloc)
                    i2e.add(i, ce2)
                    ccode
                } else if is_temp_ref {
                    val (ce2, ccode) = kexp2cexp(e2, ref None, ccode)
                    val (_, ccode) = add_local_tempref(i, ctyp, kv_flags, ce2, ccode, kloc)
                    ccode
                } else if ktp_complex || is_global ||
                            (match e2 {
                            | KExpAtom _ | KExpBinary _ | KExpUnary _ | KExpIntrin _ | KExpMkTuple _
                            | KExpMkRecord _ | KExpAt _ | KExpMem _ | KExpCast _ | KExpCCode _ => false
                            | _ => true
                            }) {
                    /* disable i=e2 assignment if i has complex type and e2 is "Nil".
                    If i is complex, it will be initialized anyway with "0" or "{}".
                    We do not need to re-initialize it once again */
                    val saved_cleanup = bctx->bctx_cleanup
                    bctx->bctx_cleanup = []
                    val assign_e2 = match (ktp_complex, e2) {
                                    | (true, KExpAtom (AtomLit(KLitNil _), _)) => false
                                    | _ => true
                                    }
                    val (flags, e0_opt, assign_e2) =
                        if !is_global {
                            (kv_flags, None, assign_e2)
                        } else {
                            /* if a global value/variable is initialized with constant,
                            we just use this constant for its initialization instead of
                            setting it to "0" and reassigning inside fx_init() */
                            val (e0_opt, assign_e2) =
                            if ktp_complex {
                                (None, assign_e2)
                            } else if ktp_ptr || ktp_scalar {
                                match e2 {
                                | KExpAtom (AtomLit l, (e2_ktyp, e2_loc)) =>
                                    val e2_ctyp = C_gen_types.ktyp2ctyp(e2_ktyp, e2_loc)
                                    (Some(CExpLit(l, (e2_ctyp, e2_loc))), false)
                                | _ => (None, assign_e2)
                                }
                            } else {
                                (None, assign_e2)
                            }
                            (kv_flags, e0_opt, assign_e2)
                        }
                    val (i_exp, delta_ccode) = add_local(i, ctyp, flags, e0_opt, [], kloc)
                    val ccode =
                        if is_global {
                            bctx->bctx_prologue = delta_ccode + bctx->bctx_prologue
                            ccode
                        } else {
                            delta_ccode + ccode
                        }
                    if is_global {
                        module_cleanup = bctx->bctx_cleanup + module_cleanup
                        bctx->bctx_cleanup = saved_cleanup
                    } else {
                        bctx->bctx_cleanup = bctx->bctx_cleanup + saved_cleanup
                    }
                    if assign_e2 {
                        val (_, ccode) = kexp2cexp(e2, ref Some(i_exp), ccode)
                        match ccode {
                        | (CExp(CExpBinary (COpAssign, CExpIdent (j, _), e, (_, loc))) ::
                            CDefVal (t, i, None, _) :: rest) when j == i =>
                            CDefVal(t, i, Some(e), loc) :: rest
                        | _ => ccode
                        }
                    } else {
                        ccode
                    }
                } else {
                    val (ce2, ccode) =
                        match (ktp_ptr, e2) {
                        | (false, KExpAtom (AtomLit(KLitNil _), (_, loc))) => (CExpInit([], (ctyp, loc)), ccode)
                        | _ => kexp2cexp(e2, ref None, ccode)
                        }
                    val (_, ccode) =
                        match (e2, ccode) {
                        | (KExpMkRecord _, CDefVal (t, tval, Some rhs, loc) :: rest) =>
                            add_local(i, ctyp, kv_flags, Some(rhs), rest, kloc)
                        | (KExpMkTuple _, CDefVal (t, tval, Some rhs, loc) :: rest) =>
                            add_local(i, ctyp, kv_flags, Some(rhs), rest, kloc)
                        | _ => add_local(i, ctyp, kv_flags, Some(ce2), ccode, kloc)
                        }
                    ccode
                }
                if kv_cname != "" {
                    match cinfo_(i, kloc) {
                    | CVal cv =>
                        val cv = cv.{cv_cname=kv_cname}
                        set_idc_entry(i, CVal(cv))
                    | _ => {}
                    }
                }
            (false, dummy_exp, ccode)
        | KDefFun kf =>
            /*
                generate new context.
                generate ccode for the body with dstid=fx_result.
                add the prologue and the cleanup sections to the generated c code.
                before cleanup possibly insert a label if needed.
                prior to the cleanup save the output expression if needed
                (because it may use elements that will be released)
                after cleanup section add "return fx_status;" if needed or
                "return ret_exp"; if the function is nothrow and is not void.

                handle the case of 'c code'-body separately
            */
            val {kf_name, kf_rt, kf_closure, kf_body, kf_cname, kf_flags, kf_loc} = *kf
            val {kci_arg, kci_fcv_t} = kf_closure
            val ctor = kf_flags.fun_flag_ctor
            if kci_arg != noid {
                ensure_sym_is_defined_or_declared(kf_name, kf_loc)
            }
            defined_syms.add(kf_name)
            new_block_ctx(BlockKind_Fun(kf_name), kloc)
            val (args, rt, is_nothrow, cf) =
            match cinfo_(kf_name, kf_loc) {
            | CFun((ref {cf_args, cf_flags, cf_rt}) as cf) =>
                val is_nothrow = cf_flags.fun_flag_nothrow
                (cf_args, cf_rt, is_nothrow, cf)
            | _ =>
                throw compile_err(kf_loc,
                    f"cgen: the function '{idk2str(kf_name, kf_loc)}' declaration was not properly converted")
            }
            val (real_args, retid, _, _) = unpack_fun_args(args, rt, is_nothrow)
            val really_nothrow = ref false
            val nreal_args = real_args.length()
            /* in the list of parameters the return value (if any) can be the last one
               (in the case of no-throw functions) or pre-last one
               (in the case of functions that may throw exceptions)
            */
            val new_body =
            match (kf_body, ctor) {
            /* C function */
            | (KExpCCode (code, (_, loc)), CtorNone) => [:: CExp(CExpCCode(code, loc))]
            /* generic function */
            | (_, CtorNone) =>
                val dstexp_r = ref (if retid == noid { None }
                                    else { Some(cexp_deref(make_id_exp(retid, kf_loc))) })
                func_dstexp_r = dstexp_r
                return_used = 0
                val orig_status_id = gen_idc(cm_idx, "fx_status")
                val (status_exp, ccode) = create_cdefval(orig_status_id, CTypCInt, default_tempvar_flags(),
                                                        "fx_status", Some(make_int_exp(0, kf_loc)), [], kf_loc)
                val status_id = if is_nothrow { noid } else { orig_status_id }
                val ccode =
                    if status_id == noid || !kf_flags.fun_flag_recursive {
                        ccode
                    } else {
                        val call_chkstk = make_call(get_id("fx_check_stack"), [], CTypCInt, kf_loc)
                        add_fx_call(call_chkstk, ccode, kf_loc)
                    }
                    for (a, t, flags) <- real_args {
                        if flags.mem(CArgPassByPtr) {
                            i2e.add(a, cexp_deref(make_id_exp(a, kf_loc)))
                        }
                    }
                val ccode =
                    if kci_arg == noid {
                        ccode
                    } else {
                        val fcv_ptr_ctyp = make_ptr(CTypName(kci_fcv_t))
                        val fcv_arg_exp0 = make_id_t_exp(get_id("fx_fv"), std_CTypVoidPtr, kf_loc)
                        val cast_ptr = CExpCast(fcv_arg_exp0, fcv_ptr_ctyp, kf_loc)
                        val (_, ccode) = create_cdefval(kci_arg, fcv_ptr_ctyp, default_tempval_flags(), "", Some(cast_ptr), ccode, kf_loc)
                        ccode
                    }
                val (ret_e, ccode) = kexp2cexp(kf_body, dstexp_r, ccode)
                val end_loc = get_kexp_end(kf_body)
                val bctx = curr_block_ctx(end_loc)
                /* exclude FX_CHECK_EXN() if it's in the very end of function */
                val ccode =
                    match ccode {
                    | (CExp(CExpCall (CExpIdent (f, _), _, _)) :: rest) when f == std_FX_CHECK_EXN =>
                        bctx->bctx_label_used = bctx->bctx_label_used - 1
                        rest
                    | _ => ccode
                    }
                val {bctx_label, bctx_prologue, bctx_cleanup, bctx_label_used} = *bctx
                val ccode =
                    if bctx_label_used > 0 || return_used > 0 {
                        CStmtLabel(bctx_label, end_loc) :: ccode
                    } else {
                        /* [TODO] it seems that the check is quite weak and yields wrong code.
                        Currently 'really_nothrow' is never set. Need some better check.

                        the use of the final "cleanup" label in the end of function is a good indicator
                        of whether the function may throw exceptions (or propagate exceptions from the called functions)
                        or not. In some cases the label might be used but the function does not throw exceptions.
                        But not vice versa. So we play safe here.*/
                        /*match ccode {
                        | CStmtLabel _ :: _ => {}
                        | _ => *really_nothrow = true
                        }*/
                        ccode
                    }
                val (ret_e, ccode) =
                    if bctx_cleanup == [] {
                        (ret_e, ccode)
                    } else {
                        val (ret_e, ccode) =
                        if retid != noid || status_id != noid || dstexp_r->issome() {
                            (ret_e, ccode)
                        } else {
                            match ret_e {
                            | CExpInit ([], _) | CExpLit _ | CExpIdent _ => (ret_e, ccode)
                            | _ => create_cdefval(gen_idc(cm_idx, "result"), get_cexp_typ(ret_e),
                                                  default_tempval_flags(), "", Some(ret_e), ccode, end_loc)
                            }
                        }
                        (ret_e, bctx_cleanup + ccode)
                    }
                val ccode =
                    if status_id != noid {
                        if return_used > 0 {
                            val chk_ret = make_call(get_id("FX_CHECK_RETURN"), [], CTypCInt, end_loc)
                            CStmtReturn(Some(chk_ret), end_loc) :: ccode
                        } else {
                            CStmtReturn(Some(status_exp), end_loc) :: ccode
                        }
                    } else if rt == CTypVoid {
                        ccode
                    } else {
                        CStmtReturn(Some(ret_e), end_loc) :: ccode
                    }
                bctx_prologue.rev() + ccode.rev()
            /* recursive or non-recursive variant constructor */
            | (_, CtorVariant tag_value) =>
                val (have_tag, is_recursive_variant, ifaces_id) =
                    match kf_rt {
                    | KTypName vn =>
                        match kinfo_(vn, kf_loc) {
                        | KVariant (ref {kvar_flags}) =>
                            val have_tag = kvar_flags.var_flag_have_tag
                            val is_recursive = kvar_flags.var_flag_recursive
                            val ifaces_id = match cinfo_(vn, kf_loc) {
                                            | CTyp (ref {ct_ifaces_id}) => ct_ifaces_id
                                            | _ => noid
                                            }
                            (have_tag, is_recursive, ifaces_id)
                        | _ =>
                            throw compile_err(kf_loc,
                                f"cgen: the return type of variant constructor {idk2str(kf_name, kf_loc)} is not variant")
                        }
                    | _ =>
                        throw compile_err(kf_loc,
                            f"cgen: the return type of variant constructor {idk2str(kf_name, kf_loc)} is not variant")
                    }
                val var_exp = make_id_exp(retid, kf_loc)
                val result_ctyp = C_gen_types.ktyp2ctyp(kf_rt, kf_loc)
                val (var_exp, ccode, ret_ccode) =
                if is_recursive_variant {
                    val alloc_var = make_call(std_FX_MAKE_RECURSIVE_VARIANT_IMPL_START, [:: CExpTyp(result_ctyp, kf_loc) ], CTypVoid, kf_loc)
                    val (var_exp, _) = create_cdefval(gen_idc(cm_idx, "v"), result_ctyp, default_tempvar_flags(), "v", None, [], kf_loc)
                    val ccode = [:: CExp(alloc_var)]
                    val ccode =
                        if ifaces_id == noid {ccode}
                        else {
                            val ifaces_ctyp = CTypName(get_id("fx_ifaces_t"))
                            val ifaces_ptr_ctyp = make_ptr(ifaces_ctyp)
                            CExp(make_assign(cexp_arrow(var_exp, get_id("ifaces"), ifaces_ptr_ctyp),
                                cexp_get_addr(make_id_t_exp(ifaces_id, ifaces_ctyp, kloc)))) :: ccode
                        }
                    val ret_ccode = [:: CStmtReturn(Some(make_int_exp(0, kf_loc)), kf_loc) ]
                    (var_exp, ccode, ret_ccode)
                } else {
                    (var_exp, [], [])
                }
                val init_tag = make_assign(cexp_arrow(var_exp, get_id("tag"), CTypInt), make_int_exp(tag_value, kloc))
                val ccode = if have_tag { CExp(init_tag) :: ccode } else { ccode }
                val dst_base = cexp_arrow(var_exp, get_id("u"), CTypAny)
                val dst_base = cexp_mem(dst_base, get_orig_id(kf_name), CTypAny)
                val fold ccode = ccode for (a, t, flags)@idx <- real_args {
                    val src_exp = make_id_t_exp(a, t, kf_loc)
                    val (src_exp, t) = maybe_deref_fun_arg(idx, src_exp, t, flags, kf_loc)
                    val dst_exp = dst_base
                    val dst_exp =
                    if nreal_args == 1 {
                        dst_exp
                    } else {
                        val tup_elem = get_id(f"t{idx}")
                        cexp_mem(dst_exp, tup_elem, t)
                    }
                    C_gen_types.gen_copy_code(src_exp, dst_exp, t, ccode, kf_loc)
                }
                (ret_ccode + ccode).rev()
            /* function pointer/closure constructor */
            | (_, CtorFP f_id) =>
                val fcv_t_id =
                    match kinfo_(f_id, kf_loc) {
                    | KFun (ref {kf_closure={kci_fcv_t}}) => kci_fcv_t
                    | _ => throw compile_err(kf_loc, f"cgen: '{get_idk_cname(f_id, kf_loc)}' is not a function")
                    }
                val fcv_t = CTypName(fcv_t_id)
                val free_f_exp =
                    match C_gen_types.get_free_f(fcv_t, true, false, kf_loc) {
                    | (_, Some free_f) => free_f
                    | _ => throw compile_err(kf_loc, f"cgen: missing destructor for closure vars '{get_idk_cname(fcv_t_id, kf_loc)}'")
                    }
                val alloc_fcv = make_call( std_FX_MAKE_FP_IMPL_START, [:: CExpTyp(fcv_t, kf_loc), free_f_exp,
                                           make_id_t_exp(f_id, std_CTypVoidPtr, kloc)], CTypVoid, kf_loc )
                val (fcv_exp, _) = create_cdefval(gen_idc(cm_idx, "fcv"), make_ptr(fcv_t),
                                                  default_tempval_flags(), "fcv", None, [], kf_loc)
                val ret_ccode = [:: CStmtReturn(Some(make_int_exp(0, kf_loc)), kf_loc) ]
                val ccode = [:: CExp(alloc_fcv)]
                val fold ccode = ccode for (a, t, flags)@idx <- real_args {
                    val src_exp = make_id_t_exp(a, t, kf_loc)
                    val (src_exp, t) = maybe_deref_fun_arg(idx, src_exp, t, flags, kf_loc)
                    val fcv_elem = get_id(f"t{idx}")
                    val dst_exp = cexp_arrow(fcv_exp, fcv_elem, t)
                    C_gen_types.gen_copy_code(src_exp, dst_exp, t, ccode, kf_loc)
                }
                (ret_ccode + ccode).rev()
            /* exception constructor */
            | (_, CtorExn exn_id) =>
                val (exn_typ, exn_tag, exn_std, exn_data_id, exn_info) =
                    match cinfo_(exn_id, kf_loc) {
                    | CExn (ref {cexn_typ, cexn_tag, cexn_std, cexn_data, cexn_info}) =>
                        (cexn_typ, cexn_tag, cexn_std, cexn_data, cexn_info)
                    | _ =>
                        throw compile_err( kf_loc,
                            f"cgen: constructor of exception '{idk2str(exn_id, kf_loc)}' is expecting converted KExn=>CExn structure")
                    }
                if exn_std {
                    ccode
                } else {
                    val exn_data_t = CTypName(exn_data_id)
                    val alloc_exn_data =
                    make_call(
                        std_FX_MAKE_EXN_IMPL_START,
                        [:: make_id_t_exp(exn_tag, CTypCInt, kf_loc),
                          CExpTyp(exn_data_t, kf_loc),
                          make_id_t_exp(exn_info, std_fx_exn_info_t, kf_loc)
                        ],
                        CTypVoid,
                        kf_loc)
                    val ret_ccode = [:: CStmtReturn(Some(make_int_exp(0, kf_loc)), kf_loc) ]
                    val exn_data = make_id_t_exp(get_id("exn_data"), make_ptr(exn_data_t), kf_loc)
                    val dst_exp = cexp_arrow(exn_data, get_id("data"), exn_typ)
                    val ccode = [:: CExp(alloc_exn_data)]
                    val fold ccode = ccode for (a, t, flags)@idx <- real_args {
                        val src_exp = make_id_t_exp(a, t, kf_loc)
                        val (src_exp, t) = maybe_deref_fun_arg(idx, src_exp, t, flags, kf_loc)
                        val dst_exp =
                        if nreal_args == 1 {
                            dst_exp
                        } else {
                            val t_elem = get_id(f"t{idx}")
                            cexp_mem(dst_exp, t_elem, t)
                        }
                        C_gen_types.gen_copy_code(src_exp, dst_exp, t, ccode, kf_loc)
                    }
                    (ret_ccode + ccode).rev()
                }
            | _ => throw compile_err(kloc, f"cgen: unsupported type of constructor {kf_cname}: {ctor2str(ctor)}")
            }
            pop_block_ctx(kloc)
            val new_cf_flags = cf -> cf_flags.{fun_flag_really_nothrow=*really_nothrow}
            *cf = cf->{cf_body=filter_out_nops(new_body), cf_flags=new_cf_flags}
            (false, dummy_exp, ccode)
        /* the exceptions are handled in c_gen_fdecls;
           the exceptions' constructors are handled above in KDefFun */
        | KDefExn ke => (false, dummy_exp, ccode)
        | KDefVariant kvar => (false, dummy_exp, ccode) /* handled in c_gen_types */
        | KDefTyp kt => (false, dummy_exp, ccode) /* handled in c_gen_types */
        | KDefInterface ki => (false, dummy_exp, ccode) /* handled in c_gen_types */
        | KDefClosureVars kcv => (false, dummy_exp, ccode) /* handled in c_gen_types */
        }

        if !assign || ctyp == CTypVoid {
            (result_exp, ccode)
        } else {
            match *dstexp_r {
            | Some dst_exp =>
                val skip_copy =
                    match result_exp {
                    | CExpLit (KLitNil _, _) =>
                        val {ctp_ptr} = C_gen_types.get_ctprops(ctyp, kloc)
                        ctp_ptr
                    | _ => false
                    }
                val ccode =
                    if skip_copy { ccode }
                    else { C_gen_types.gen_copy_code(result_exp, dst_exp, ctyp, ccode, kloc) }
                (dst_exp, ccode)
            | _ => (result_exp, ccode)
            }
        }
    }

    /*
        all the global code should be put into fx_init_...() function.
        Let's form its body, starting with the standard `int fx_status = 0;`
    */
    val start_loc = if top_code == [] { noloc }
                    else { get_kexp_loc(top_code.hd()) }
    new_block_ctx(BlockKind_Global, start_loc)
    val (status_exp, ccode) =
    create_cdefval(gen_idc(cm_idx, "fx_status"), CTypCInt, default_tempvar_flags(), "fx_status", Some(make_int_exp(0, start_loc)), [], start_loc)
    /* convert all the code to C. It will automatically update functions bodies */
    val (e, ccode) = kexp2cexp(code2kexp(top_code, start_loc), ref None, ccode)
    pr_verbose(f"\t'{pp(km_name)}' has been translated to C. Finalizing the produced code ...")
    val end_loc = get_cexp_loc(e)
    /* bctx_prologue will contain all the global definitions.
       bctx_cleanup will contain destructor calls for all the global definitions.
       Need to add it to end of fx_deinit_...() and form its body
    */
    val {bctx_prologue, bctx_label, bctx_cleanup, bctx_label_used} = *curr_block_ctx(end_loc)
    val fold global_vars = [], temp_init_vals = [] for s <- bctx_prologue {
        val is_global =
        match s {
        | CDefVal (_, i, _, loc) =>
            match cinfo_(i, loc) {
            | CVal ({cv_flags}) => !(cv_flags.val_flag_temp || cv_flags.val_flag_tempref)
            | _ => true
            }
        | _ => true
        }
        if is_global { (s :: global_vars, temp_init_vals) }
        else { (global_vars, s :: temp_init_vals) }
    }
    pop_block_ctx(end_loc)
    val ccode = match e {
                | CExpIdent _ | CExpLit _ => ccode
                | _ => cexp2stmt(e) :: ccode
                }
    val ccode = if bctx_label_used == 0 { ccode }
                else { CStmtLabel(bctx_label, end_loc) :: ccode }
    val ccode = filter_out_nops(ccode)
    val ccode = bctx_cleanup + ccode
    val deinit_ccode = module_cleanup
    val ccode = CStmtReturn(Some(status_exp), end_loc) :: ccode
    val km_cname = K_mangle.mangle_mname(km_cname)
    val init_cname = "fx_init_" + km_cname
    val init_name = gen_idc(cm_idx, init_cname)
    val init_f = ref (cdeffun_t {
            cf_name=init_name, cf_args=[],
            cf_rt=CTypCInt, cf_cname=init_cname,
            cf_body=temp_init_vals + (mod_init_calls + ccode.rev()),
            cf_flags=default_fun_flags(), cf_scope=[],
            cf_loc=end_loc })
    val deinit_cname = "fx_deinit_" + km_cname
    val deinit_name = gen_idc(cm_idx, deinit_cname)
    val deinit_f = ref (cdeffun_t {
            cf_name=deinit_name, cf_args=[], cf_rt=CTypVoid,
            cf_cname=deinit_cname, cf_body=deinit_ccode.rev(),
            cf_flags=default_fun_flags(), cf_scope=[],
            cf_loc=end_loc })
    set_idc_entry(init_name, CFun(init_f))
    set_idc_entry(deinit_name, CFun(deinit_f))
    val mod_names = if km_main { km_cname :: [:: for {cmod_cname} <- cmods { cmod_cname } ] }
                    else { [] }
    val all_ccode_prologue = gen_ccode_prologue(km_main, start_loc) + top_inline_ccode.rev()
    val all_ccode = global_vars + fwd_fdecls.rev() + glob_data_ccode.rev() +
                    c_fdecls + [:: CDefFun(init_f), CDefFun(deinit_f) ] +
                    gen_main(km_main, mod_names, end_loc)
    (all_ccode_prologue, all_ccode)
}

fun gen_ccode_all(kmods: kmodule_t list): cmodule_t list
{
    /* 1. convert all types to C from all modules */
    val (all_ctypes_fwd_decl, all_ctypes_decl, all_ctypes_fun_decl) = C_gen_types.convert_all_typs(kmods)
    pr_verbose("\ttypes definitions have been translated to C")

    /* 2. convert function declarations to C */
    val (kmods_plus, _) =
        fold kmods_plus = [], all_exn_data_decls = ([]: ccode_t) for km <- kmods {
            val (c_fdecls, mod_init_calls, mod_exn_data_decls) = C_gen_fdecls.convert_all_fdecls(km.km_idx, km.km_top)
            ((km, c_fdecls, mod_init_calls, all_exn_data_decls.rev()) :: kmods_plus,
            mod_exn_data_decls.rev() + all_exn_data_decls)
        }
    pr_verbose("\tfunction declarations and exceptions have been translated to C")

    /* 3. convert each module to C */
    val fold cmods = [] for (km, c_fdecls, mod_init_calls, exn_data_decls) <- kmods_plus.rev() {
        val {km_name, km_cname, km_main, km_skip, km_pragmas} = km
        val (prologue, ccode) = gen_ccode(cmods, km, c_fdecls, mod_init_calls)
        val ctypes = C_gen_types.elim_unused_ctypes(km.km_name, all_ctypes_fwd_decl,
                            all_ctypes_decl + exn_data_decls, all_ctypes_fun_decl, ccode)
        (cmodule_t {
            cmod_name=km_name,
            cmod_cname=K_mangle.mangle_mname(km_cname),
            cmod_ccode=prologue + (ctypes + ccode),
            cmod_main=km_main,
            cmod_recompile=true,
            cmod_skip=km_skip,
            cmod_pragmas=km_pragmas
        }) :: cmods
    }
    pr_verbose("\tall modules have been translated")
    cmods.rev()
}
